diff --git a/data/fieldlist_CMIP.jsonc b/data/fieldlist_CMIP.jsonc index 19417846c..63a894c5b 100644 --- a/data/fieldlist_CMIP.jsonc +++ b/data/fieldlist_CMIP.jsonc @@ -392,6 +392,12 @@ "units": "degC", "ndim": 4 }, + "sftlf": { + "standard_name": "land_area_fraction", + "realm": "atmos", + "units": "%", + "ndim": 2 + }, // Variables for Convective Transition Diagnostics module: // ta: 3D temperature, units = K: "ta": { diff --git a/diagnostics/WWEs/WWE_diag_tools.py b/diagnostics/WWEs/WWE_diag_tools.py new file mode 100644 index 000000000..0609de1d0 --- /dev/null +++ b/diagnostics/WWEs/WWE_diag_tools.py @@ -0,0 +1,506 @@ +import numpy as np +import xarray as xr +import os +import xesmf as xe +import matplotlib.pyplot as plt +from scipy.ndimage import ( + label, + find_objects, + sum as ndi_sum, + mean as ndi_mean, + center_of_mass) +from scipy import stats +import scipy + +def low_pass_weights(window, cutoff): + """Calculate weights for a low pass Lanczos filter. + Args: + window: int + The length of the filter window. + cutoff: float + The cutoff frequency in inverse time steps. + From: https://github.com/liv0505/Lanczos-Filter/blob/master/lanczosbp.py + """ + order = ((window - 1) // 2 ) + 1 + nwts = 2 * order + 1 + w = np.zeros([nwts]) + n = nwts // 2 + w[n] = 2 * cutoff + k = np.arange(1., n) + sigma = np.sin(np.pi * k / n) * n / (np.pi * k) + + firstfactor = np.sin(2. * np.pi * cutoff * k) / (np.pi * k) + w[n-1:0:-1] = firstfactor * sigma + w[n+1:-1] = firstfactor * sigma + + return w[1:-1] + +def filter_data(data = None, nweights = 201, a = 5): + weights = low_pass_weights(nweights, 1./a) + + weight_array = xr.DataArray(weights, dims = ['window']) + + #apply the filters using the rolling methos with the weights + filtered_data = data.rolling(time = len(weights), center = True).construct('window').dot(weight_array) + + return filtered_data + +def land_mask_using_etopo(ds = None, topo_latgrid_1D = None, topo_longrid_1D = None, + topo_data1D = None, lf_cutoff = 10): + + half_xbin_size = float((ds.lon - np.roll(ds.lon, 1))[1:].mean()/2) + half_ybin_size = float((ds.lat - np.roll(ds.lat, 1))[1:].mean()/2) + + first_xval = float((ds.lon[0] - half_xbin_size).values) + xbin_edges = np.asarray(ds.lon + half_xbin_size) + xbin_edges = np.insert(xbin_edges, 0, first_xval) + + first_yval = float((ds.lat[0] - half_ybin_size).values) + ybin_edges = np.asarray(ds.lat + half_ybin_size) + ybin_edges = np.insert(ybin_edges, 0, first_yval) + + bin_topo = stats.binned_statistic_2d(topo_latgrid_1D, topo_longrid_1D, topo_data1D, + statistic = lambda topo_data1D : np.count_nonzero(topo_data1D >= 0)/topo_data1D.size, + bins=[ybin_edges, xbin_edges], expand_binnumbers = True) + + lf_vals = bin_topo.statistic * 100. + ls_mask = (lf_vals < lf_cutoff)*1 + + return ls_mask + +def regridder_model2obs(lon_vals = None, lat_vals = None, in_data = None, type_name = 'bilinear', isperiodic = True): + out_grid = xr.Dataset( + { + "lat": (["lat"], lat_vals), + "lon": (["lon"], lon_vals) + }) + + regridder = xe.Regridder(in_data, out_grid, method = type_name, periodic = isperiodic) + + return regridder + +#Used for removing the first n harmonics from the seasonal cycle +def nharm(x, N): + if x.any()==0: + print('here') + return np.zeros(N) + fft_output = scipy.fft.fft(x) + freq = scipy.fft.fftfreq(len(x), d = 1) + filtered_fft_output = np.array([fft_output[i] if round(np.abs(1/f),2) in\ + [round(j,2) for j in [N, N/2, N/3]] else 0 for i, f in enumerate(freq)]) + #,N/2,N/3 + filtered_sig = scipy.fft.ifft(filtered_fft_output) + filtered = filtered_sig.real + + return filtered + +""""" +Explanation of bb_sizes used below: +followed - https://stackoverflow.com/questions/36200763/objects-sizes-along-dimension + +"for object_slice in bb_slices" loops through each set of labeled_wwb slices +Example: +for object_slice in bb_slices: + print(object_slice) + +(slice(110, 112, None), slice(0, 1, None), slice(31, 35, None)) +(slice(111, 114, None), slice(0, 1, None), slice(19, 27, None)) +(slice(127, 130, None), slice(0, 1, None), slice(12, 21, None)) +etc. + +----------------------------------------------------------------------- +"s.stop-s.start for s in object_slice" + +Example: +for s in bb_slices[50]: + print(s.stop - s.start) + +3, 1, 13 +since the bb_slices[50] = (slice(644, 647, None), slice(0, 1, None), slice(85, 98, None)) + +----------------------------------------------------------------------- +Note, for IWW: +The 0th blob (i.e., blob that has array index of 0) has 1 for a label. +So, when indexing the labels need to add 1. (i.e., the slices for blob #17, +which is the first blob that qualifies as a WWB, are +(slice(295, 302, None), slice(0, 1, None), slice(15, 42, None)). These slices +correspond to the blob labeled with and 18. print(labeled_blobs[bb_slices[iblob]]) +returns values of 0 and 18 since the slice includes the bounding box of the blob + +""""" +def isolate_WWEs(data = None, tauu_thresh = 0.04, mintime = 5, + minlons = 10, xmin = 3, xmax = 3, ymin = 3, + ymax = 3, xtend_past_lon = 140): + + lon_array = np.asarray(data["lon"]) + + # 1) Create mask for tauu > tauu_thresh (default is 0.04 following Puy et al. (2016)) + tauu_mask = data.where(data > tauu_thresh, 0) #Assign elements with tauu < tauu_thresh zero + tauu_mask = tauu_mask.where(tauu_mask == 0, 1) #Assign elements with tauu > tauu_thresh one + + # 2) Find tauu blobs: + #assign each contiguous region of tauu > 0.04 a unique number using the mask generated above + labeled_blobs, n_blobs = label(tauu_mask) + + #Find the bounding box of each wwb feature + bb_slices = find_objects(labeled_blobs) + + # 3) Find the size of the bounding box for each wwb feature, + #Explanation give above + bb_sizes = np.array([[s.stop-s.start for s in object_slice] + for object_slice in bb_slices]) + + # 4) Find where blobs last at least X days and for X° of longitude + time_index = np.where(np.asarray(data.dims) == 'time') + lon_index = np.where(np.asarray(data.dims) == 'lon') + + w_wwe = np.where((bb_sizes[:, time_index] >= mintime) & (bb_sizes[:, lon_index] >= minlons)) + n_wwe = np.count_nonzero((bb_sizes[:, time_index] >= mintime) & + (bb_sizes[:, lon_index] >= minlons)) + #w_wwe_array = np.asarray(w_wwe) + + # 5) Make a mask of only the blobs that qualify as WWEs + #Create wwe_mask array to be filled + wwe_mask = np.zeros_like(labeled_blobs) + + #Loop through blobs that satisfiy intensity, duration, and zonal length criteria + for i in range(len(w_wwe[0])): + #w_temp = np.where(flat_lab_blobs == w_wwe_array[0][i]+1) + w_temp = np.where(labeled_blobs == w_wwe[0][i]+1) + wwe_mask[w_temp] = 1 + + # 6) Label WWEs + labeled_wwes, n_wwes = label(wwe_mask) + + # 7) Loop over all WWEs to see if any of them are close enough < 3 days and < 3° to count as same event + wwes_after_merging = find_nearby_wwes_merge(n_wwes = n_wwes, labeled_wwes = labeled_wwes, + xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax) + + # 8)Renumber the labels from 1 to nWWEs: + #At this point some of the WWE labels have been eliminated because they were merged, + #so need to renumber the labels from 1 to nWWEs + new_wwe_labels = renumber_wwes(wwe_labels = wwes_after_merging) + + # 9) Check to see if the WWE extends beyond 140°E + # Find the bounding box of each wwe feature + bb_slices = find_objects(new_wwe_labels) + + #Find max longitudes for each WWE + #The -1 for finding the indices is because the bb_slice gives the slice values and the stop value + #is not inclusive in python (e.g., array[3:10] includes elements 3-9 and NOT 10) + max_time_lon_ind = np.array([[s.stop for s in object_slice] + for object_slice in bb_slices]) + max_lon_inds = max_time_lon_ind[:, lon_index[0][0]] -1 + max_lons = lon_array[max_lon_inds] + + #Find min longitudes for each WWE + min_time_lon_ind = np.array([[s.start for s in object_slice] + for object_slice in bb_slices]) + min_lon_inds = min_time_lon_ind[:, lon_index[0][0]] -1 + min_lons = lon_array[min_lon_inds] + + wwe_labels_count = np.unique(new_wwe_labels)[1:] + wwe_maxlonsLTX = np.where(max_lons < xtend_past_lon) + n_wwe_maxlonsLTX = np.count_nonzero(max_lons < xtend_past_lon) + + # 10) Remove the WWE that isn't long enough + if n_wwe_maxlonsLTX > 0: + for i2remove in range(n_wwe_maxlonsLTX): + wwe2remove = wwe_labels_count[wwe_maxlonsLTX[0][i2remove]] + + wwe_labels_afterRemoveX = np.where(new_wwe_labels == wwe2remove, 0, new_wwe_labels) + + min_lons = np.delete(min_lons, wwe_maxlonsLTX) + max_lons = np.delete(max_lons, wwe_maxlonsLTX) + + # 11) Relabel WWE again becasue now more WWEs have potentially been removed + wwe_labels_final = renumber_wwes(wwe_labels = wwe_labels_afterRemoveX) + + # 12) Final mask array + wwe_mask_final = np.where(wwe_labels_final !=0, 1, 0) + + else: + wwe_labels_final = new_wwe_labels + wwe_mask_final = wwe_mask + + return wwe_labels_final, wwe_mask_final + +def find_nearby_wwes_merge(n_wwes = 0, labeled_wwes = None, xmin = 3, xmax = 3, + ymin = 3, ymax = 3): + + for i in range(1, n_wwes): + wblob = np.where(labeled_wwes == i) + npts = np.asarray(wblob).shape[1] + + #Loop over each point in WWE and check surrounding 3 days and 3° for another labeled WWE + for ipt in range(npts): + if len(wblob) == 3: + check_wwe_label = labeled_wwes[wblob[0][ipt] - xmin : wblob[0][ipt] + xmax + 1, + 0, + wblob[2][ipt] - ymin : wblob[2][ipt] + ymax + 1] + if len(wblob) == 2: + check_wwe_label = labeled_wwes[wblob[0][ipt] - xmin : wblob[0][ipt] + xmax + 1, + wblob[1][ipt] - ymin : wblob[1][ipt] + ymax + 1] + + n_diff_label = np.count_nonzero((check_wwe_label != i) & (check_wwe_label != 0)) + + #Replace nearby WWE label with earlier in time and/or space WWE, so the later WWE becomes + #a part of the smaller numbered WWE + if n_diff_label > 0: + w_diff_label = np.where((check_wwe_label != i) & (check_wwe_label != 0)) + unique_overlapping_wwes = np.unique(check_wwe_label[w_diff_label]) + + for ioverlapwwe in range(unique_overlapping_wwes.size): + w_ioverlapwwe = np.where(labeled_wwes == unique_overlapping_wwes[ioverlapwwe]) + labeled_wwes[w_ioverlapwwe] = i + + return labeled_wwes + +def renumber_wwes(wwe_labels = None): + uniq_labels0 = np.unique(wwe_labels) + + #Remove the 0 label as it's not a WWE + uniq_labels = uniq_labels0[~(uniq_labels0 == 0)] + new_wwe_labels = np.zeros_like(wwe_labels) + + #Re-label wwes 0-nWWEs, so that find_object works correctly + jj = 1 + + for iwwe in range(len(uniq_labels)): + w_wwe_label = np.where(wwe_labels == uniq_labels[iwwe]) + new_wwe_labels[w_wwe_label] = jj + jj += 1 + + return new_wwe_labels + +def WWE_characteristics(wwe_labels = None, data = None): + + #Find the bounding box of each WEE feature + bb_slices = find_objects(wwe_labels) + + #Find the size of the bounding box for each wwb feature, + #Explanation give above + object_sizes = np.array([[s.stop-s.start for s in object_slice] + for object_slice in bb_slices]) + + #Find the duration, length, and integrated wind work (IWW) of each WWE + time_index = np.where(np.asarray(data.dims) == 'time') + lon_index = np.where(np.asarray(data.dims) == 'lon') + + duration = object_sizes[:, time_index[0][0]] + zonal_extent = object_sizes[:, lon_index[0][0]] + + array_label_values = np.arange(len(bb_slices))+1 + wwe_sum = ndi_sum(np.asarray(data), wwe_labels, array_label_values) + wwe_mean = ndi_mean(np.asarray(data), wwe_labels, array_label_values) + + return duration, zonal_extent, wwe_sum, wwe_mean + +def find_WWE_time_lon(data = None, wwe_labels = None, lon = None, time_array = None): + + bb_slices = find_objects(wwe_labels) + + time_index = np.where(np.asarray(data.dims) == 'time') + lon_index = np.where(np.asarray(data.dims) == 'lon') + + uniq_labels = np.unique(wwe_labels)[1:] + time_lon_center = center_of_mass(np.asarray(data), wwe_labels, uniq_labels) + + #Use zip to extract the first & second element of + #each tuple in the time_lon_center list + cent_time_ind = list(zip(*time_lon_center))[int(time_index[0])] + cent_lon_ind = np.asarray(list(zip(*time_lon_center))[int(lon_index[0])]) + + #3/15/2023: round time to nearest day. I doubt the SSH data will + #be finer than a day, so rounding to a day seems sufficient. + cent_time_ind = np.round(cent_time_ind).astype("int") + + lower_lon_val = lon[np.floor(cent_lon_ind).astype("int")] + upper_lon_val = lon[np.ceil(cent_lon_ind).astype("int")] + + #Make sure longitude delta is only 1degree + delta_lon = upper_lon_val - lower_lon_val + if np.count_nonzero(delta_lon != 1) > 0: + print("Longitude delta GT 1degree") + + #Interpolate the longitude values using interpolation equation + # y = y1 + (y2 - y1) * [(x - x1)/(x2 - x1)] + # y(s) in this case are the lon values and x(s) are the lon indicies + # since the longitudes and indicies increment by one the euqtion + # reduces to y = y1 + (x - x1) + center_lon_vals = lower_lon_val + (cent_lon_ind - np.floor(cent_lon_ind)) + center_time_vals= time_array[cent_time_ind] + + #Added 4/25/2024 + #Find max time for each WWE' + #The -1 for finding the indices is because the bb_slice gives the slice values and the stop value + #is not inclusive in python (e.g., array[3:10] includes elements 3-9 and NOT 10) + max_time_lon_ind = np.array([[s.stop for s in object_slice] + for object_slice in bb_slices]) + max_time_inds = max_time_lon_ind[:, time_index[0][0]] - 1 + max_times = time_array[max_time_inds] + + #Find min time for each WWE + min_time_lon_ind = np.array([[s.start for s in object_slice] + for object_slice in bb_slices]) + min_time_inds = min_time_lon_ind[:, time_index[0][0]] + min_times = time_array[min_time_inds] + + #Find max longitudes for each WWE + max_time_lon_ind = np.array([[s.stop for s in object_slice] + for object_slice in bb_slices]) + max_lon_inds = max_time_lon_ind[:, lon_index[0][0]] - 1 + max_lons = lon[max_lon_inds] + + #Find min longitudes for each WWE + min_time_lon_ind = np.array([[s.start for s in object_slice] + for object_slice in bb_slices]) + min_lon_inds = min_time_lon_ind[:, lon_index[0][0]] + min_lons = lon[min_lon_inds] + + return center_lon_vals, center_time_vals, min_times, max_times, min_lons, max_lons + + +def events_per_lon(in_da = None): + + nevents = np.unique(in_da)[1:].size #Don't count zero + event_nums = np.unique(in_da)[1:] + + event_lon_mask = np.zeros(nevents*in_da.lon.size).reshape(nevents, in_da.lon.size) + var_shape_list = in_da.shape + + for ievent in range(nevents): + lon_axis = var_shape_list.index(in_da.lon.shape[0]) + temp_lons = np.zeros(in_da.lon.size) + w = np.where(in_da == event_nums[ievent]) + uniq_wlons= np.unique(w[lon_axis]) + temp_lons[uniq_wlons] = 1 + event_lon_mask[ievent] = temp_lons + + count_event_lons = np.sum(event_lon_mask, axis = 0) + + return count_event_lons, nevents + +##################################################################### +###PLOTTING CODE +##################################################################### +def plot_model_Hovmollers_by_year(data = None, wwe_mask = None, lon_vals = None, + tauu_time = None, savename = '', + start_date = '', end_date = ''): + + year_array = np.unique(tauu_time.dt.year) + nyears = np.unique(tauu_time.dt.year).size + + fig, ax = plt.subplots(ncols=5, nrows=4, figsize = (15, 16), sharex = True, sharey = True) + axlist = ax.flatten() + shade_choice = 'bwr' + levs = np.linspace(-0.1, 0.1, 21) + + kwargs = {'fontsize':12} + #################################################################################### + #Loop through each year to make a Hovmoller panel of filtered zonal wind stress + #for each year overlapped with WWE blobs + #################################################################################### + for iyear in range(20): + wiyear = np.where((np.asarray(tauu_time.dt.year) == year_array[iyear])) + + ######################################################################## + #Plot details + ########################################################################= + cf = axlist[iyear].contourf(np.asarray(lon_vals), np.arange(0, tauu_time[wiyear[0]].size), + np.asarray(data[wiyear[0], :]), levels = levs, + cmap = shade_choice, extend = 'both') + + cl = axlist[iyear].contour(np.asarray(lon_vals), np.arange(0, tauu_time[wiyear[0]].size), + wwe_mask[wiyear[0], :], cmap = 'binary', linewidths = 1) + + axlist[iyear].grid(alpha = 0.5) + + if iyear >=15 :axlist[iyear].set_xlabel('longitude', **kwargs) + if iyear%5 == 0: axlist[iyear].set_ylabel('day of year', **kwargs) + axlist[iyear].set_title(str(year_array[iyear]), fontsize=12, loc = 'left') + axlist[iyear].tick_params(axis='y', labelsize=12) + axlist[iyear].tick_params(axis='x', labelsize=12) + plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9) + + cbar_ax = fig.add_axes([0.81, 0.35, 0.015, 0.3]) + cbar_ax.tick_params(labelsize=12) + cb = plt.colorbar(cf, cax=cbar_ax) + cb.set_label(label = '$\u03C4_x$ (N $m^{-2}$)', fontsize = 12) + + plt.savefig(savename + '.' + start_date + '.YearlyHovmollers.png', bbox_inches='tight') + + if year_array.size > 20: + fig, ax = plt.subplots(ncols=5, nrows=4, figsize = (15, 16), sharex = True, sharey = True) + axlist = ax.flatten() + + for iyear in range(year_array.size - 20): + wiyear = np.where((np.asarray(tauu_time.dt.year) == year_array[iyear + 20])) + + #################################################################### + #Plot details + ######################################################################## + cf = axlist[iyear].contourf(np.asarray(lon_vals), np.arange(0, tauu_time[wiyear[0]].size), + np.asarray(data[wiyear[0], :]), levels = levs, + cmap = shade_choice, extend = 'both') + + cl = axlist[iyear].contour(np.asarray(lon_vals), np.arange(0, tauu_time[wiyear[0]].size), + wwe_mask[wiyear[0], :], cmap = 'binary', linewidths = 1) + + axlist[iyear].grid(alpha = 0.5) + + if iyear >=15 :axlist[iyear].set_xlabel('longitude', **kwargs) + if iyear%5 == 0: axlist[iyear].set_ylabel('day of year', **kwargs) + axlist[iyear].set_title(str(year_array[iyear + 20]), fontsize=12, loc = 'left') + axlist[iyear].tick_params(axis='y', labelsize=12) + axlist[iyear].tick_params(axis='x', labelsize=12) + plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9) + + cbar_ax = fig.add_axes([0.81, 0.35, 0.015, 0.3]) + cbar_ax.tick_params(labelsize=12) + cb = plt.colorbar(cf, cax=cbar_ax) + cb.set_label(label = '$\u03C4_x$ (N $m^{-2}$)', fontsize = 12) + + #start2ndpage = str(int(first_year) + 20) + plt.savefig(savename + '.' + end_date + '.YearlyHovmollers.png', bbox_inches='tight') + + return cf + +def plot_WWE_likelihood_per_lon(lons = None, model_prop_per_day = None, + obs_prop_per_day = None, savepath = '', + model_name = ''): + + #Set the plot + fig, ax = plt.subplots(figsize=(6, 4)) + + #Model simulation + cf = ax.plot(lons, model_prop_per_day) + cf = ax.fill_between(lons, model_prop_per_day*0, model_prop_per_day, alpha=0.9, label = model_name) + + #Observations + cf2 = ax.plot(lons, obs_prop_per_day, color = 'gray') + cf2 = ax.fill_between(lons, obs_prop_per_day*0, obs_prop_per_day, color = 'gray', alpha = 0.7, label = 'TropFlux observations') + + #Format information + ax.legend(fontsize = 12) + ax.set_title(model_name, fontsize = 14) + ax.set_xlabel('longitude', fontsize = 14) + ax.set_ylabel('Probability per day (%)', fontsize = 14) + ax.tick_params(axis='y', labelsize=14) + ax.tick_params(axis='x', labelsize=14) + ax.set_ylim(0, 1.8) + ax.grid(alpha = 0.5) + + #Add second axis + ax2 = ax.twinx() + ytick_vals = ax.get_yticks() + ytick_vals[-1] = 1.8 + label = np.around(100/ytick_vals, decimals = 1) + label = ['inf', '200.0', '100.0', '66.7', ''] + ax2.set_yticks(ytick_vals) + ax2.set_yticklabels(label, fontsize = 14) + ax2.set_xlabel('Longitude', fontsize = 14) + ax2.set_ylabel('Rate of return (days)', fontsize = 14) + plt.savefig(savepath + model_name + "_and_TropFlux_WWE_prob_per_day.png", bbox_inches='tight') + + return cf diff --git a/diagnostics/WWEs/WWEs.html b/diagnostics/WWEs/WWEs.html new file mode 100644 index 000000000..0e2379e2d --- /dev/null +++ b/diagnostics/WWEs/WWEs.html @@ -0,0 +1,78 @@ + + +Westerly wind events + +

Westerly wind events

+

+Westerly wind events (WWEs) are anomalously strong, long lasting westerlies +that predominantly occur over the west Pacific Ocean. They are capable +of exciting oceanic Kelvin waves, which in turn warm the ocean surface +and deepen the thermocline as the wave propagates eastward. These +WWE-Kelvin wave interactions can therefore play an important role in +ENSO variability and it is important to understand the fidelity of +WWEs and Kelvin waves in earth system models relative to observations. +

+

+This POD identifies WWEs and their characteristics across the Pacific +Ocean (120E-280E) using daily equatorially averaged (2.5S - 2.5N) +120-day highpass filtered zonal wind stress from TropFlux observation +and earth system models (ESMs). The WWE characteristics include +central longitude and date, zonal extent, duration, and integrated +wind work (IWW), which is the sum of all zonal wind stress within the +WWE time-longitude patch. The IWW indicates the WWE's total forcing to +the ocean. The methods for WWE identification are fully discussed in +Riley Dellaripa et al. (2024). +

+The results of the WWE identification are plotted below in +time-longitude space (i.e., Hovmollers). The shading is the anomalous +zonal wind stress and the black outlines indicate the WWEs. Each +Hovmoller represents one year in the obserations or ESM. +

+Also plotted is a 1D histogram of the likelihood of each 1 longitude +bin across the Pacific experiencing a WWE per day. The likelihood is +calculated as the total number of unique events at each 1 longitude +divided by the total number of days in each data set. The reciprocal +of the frequncy indicates the average return rate of a WWE per +longitude or the average number of days between WWEs at each +longitude. +

+A complementary POD assesses the fidelity of oceanic +Kelvin waves in ESMs. +

+ +

Reference:

+Riley Dellaripa, E. M., C. A. DeMott, J. Cui, and E. D. Maloney, 2024: +Evaluation of Equatorial Westerly Wind Events in the Pacific Ocean in +CMIP6 Models. J. Climate, https://doi.org/10.1175/JCLI-D-23-0629.1 +

+

{{CASENAME}}

+

Hovmollers of zonal wind stress and WWEs:

+ + + + + +
Hovmollers of first 20 years: +
{{CASENAME}} + TropFlux 1980-1999 +
+

+ + + + + +
Hovmollers beyond year 20: +
{{CASENAME}} + TropFlux 2000-2014 +
+ +

+ +

WWE probability per longitude in TropFlux and {{CASENAME}}

+plot + diff --git a/diagnostics/WWEs/WWEs.py b/diagnostics/WWEs/WWEs.py new file mode 100644 index 000000000..8e7d85c95 --- /dev/null +++ b/diagnostics/WWEs/WWEs.py @@ -0,0 +1,433 @@ +import matplotlib +matplotlib.use("Agg") # non-X windows backend + +import matplotlib.pyplot as plt +import numpy as np +import xarray as xr +import os +import time +import xesmf as xe +import scipy +from scipy import stats +from functools import partial +import intake +import sys +import yaml + +from WWE_diag_tools import ( + land_mask_using_etopo, + regridder_model2obs, + nharm, + filter_data, + isolate_WWEs, + WWE_characteristics, + find_WWE_time_lon, + plot_model_Hovmollers_by_year, + events_per_lon, + plot_WWE_likelihood_per_lon) + +#################################################################################### +#Define some paths and functions +#################################################################################### +def find_WWEs_and_characteristics(in_data = None, tauu_thresh = 0.04, mintime = 5, minlons = 10, + xminmax = (3, 3), yminmax = (3, 3), minmax_dur_bins = (5, 27), + dur_bin_space = 2, minmax_IWW_bins = (1, 42), IWW_bin_space = 4, + xtend_past_lon = 140): + ''' + This function call the following functions within WWE_diag_tools.py + - isolate_WWEs + - find_nearby_wwes_merge + - renumber_wwes + - WWE_chracteristics + - find_WWE_time_lon + ''' + + start_time = time.time() + + # 1) Find WWEs + #The isolate_WWEs function uses the find_nearby_wwes_merge, renumber_wwes functions + WWE_labels, WWE_mask = isolate_WWEs(data = in_data, tauu_thresh = tauu_thresh, mintime = mintime, + minlons = minlons, xmin = xminmax[0], xmax = xminmax[1], + ymin = yminmax[0], ymax = yminmax[1], xtend_past_lon = xtend_past_lon) + + # 2) Find characteristics (i.e., duration, zonal extent, integrated wind work sum and mean) of each WWE + #Uses WWE_characteristics function + duration, zonal_extent, IWW, tauu_mean = WWE_characteristics(wwe_labels = WWE_labels, data = in_data) + + # 3) Find central, min, and max time and longitude of each WWE + #Uses find_WWE_time_lon function + tauu_time = in_data["time"] + tauu_lon = in_data["lon"] + lon_array = np.asarray(tauu_lon) + + center_lons, center_times, min_times, max_times, min_lons, max_lons \ + = find_WWE_time_lon(data = in_data, wwe_labels = WWE_labels, + lon = lon_array, time_array = tauu_time) + + print("--- %s seconds to ID WWEs and compute characteristics---" % (time.time() - start_time)) + + return duration, IWW, zonal_extent, tauu_mean, WWE_labels, WWE_mask, center_lons, \ + center_times, min_times, max_times, min_lons, max_lons, \ + lon_array, tauu_time + + +def save_filtered_tauu_WWEchar(WWE_labels = None, WWE_mask = None, tauu_anom_vals = None, duration = None, IWW = None, + zonal_extent = None,tauu_anom_mean = None, tauu_abs_mean = None, + center_lon_vals = None, center_time_vals = None, min_lon_vals = None, + max_lon_vals = None, min_time_vals = None, max_time_vals = None, + lon_array = None, tauu_time = None, save_name = '', + filt_descrip = '120-day HP filtered'): + + uniq_WWE_labels = np.unique(WWE_labels)[1:] + #reference_time = pd.Timestamp('1970-01-01T00:00:00Z') + + data_vars = dict( + wwe_labels =(['time', 'lon'], WWE_labels.squeeze(), dict(units='None', long_name='Unique label for each WWE')), + wwe_mask =(['time', 'lon'], WWE_mask.squeeze(), dict(units='Binary', long_name='1s are where WWEs are located, 0s are locations without WWEs')), + tauu_anom =(['time', 'lon'], tauu_anom_vals.squeeze(), dict(units='Pa', long_name = 'Mean ' + filt_descrip + ' zonal wind stress')), + tauu_anom_mean=(['events'], tauu_anom_mean, dict(units='Pa', long_name = 'Mean ' + filt_descrip + ' zonal wind stress per WWE')), + tauu_abs_mean =(['events'], tauu_abs_mean, dict(units='Pa', long_name = 'Mean absolute zonal wind stress per WWE')), + duration =(['events'], duration, dict(units='Days', long_name = 'duration of each WWE')), + IWW_vals =(['events'], IWW, dict(units='Pa', long_name='Integrated wind work for each WWE')), + zonal_extent =(['events'], zonal_extent, dict(units='Degrees', long_name = 'Longitudinal extent of each WWE')), + center_lons =(['events'], center_lon_vals, dict(units='Degrees', long_name = 'Mass-weighted center longitude for each WWE')), + min_lons =(['events'], min_lon_vals, dict(units='Degrees', long_name = 'Min longitude for each WWE')), + max_lons =(['events'], max_lon_vals, dict(units='Degrees', long_name = 'Max longitude for each WWE')), + min_times =(['events'], min_time_vals), + max_times =(['events'], max_time_vals), + center_times =(['events'], center_time_vals) + ) + + ds = xr.Dataset(data_vars = data_vars, + coords=dict( + events= (["events"], uniq_WWE_labels), + lon = lon_array, + time = tauu_time, + ), + attrs=dict(description= filt_descrip + " zonal wind stress and WWE characteristics. Generated using MDTF POD WWE diagnostic") + ) + + ds.to_netcdf(save_name + '.WWE_characteristics.nc') + + return ds + +def _preprocess(x, lon_bnds, lat_bnds): + return x.sel(lon=slice(*lon_bnds), lat=slice(*lat_bnds)) + + +print("\n=======================================") +print("BEGIN WWEs.py ") +print("=======================================") + +print("*** Parse MDTF-set environment variables ...") +work_dir = os.environ["WORK_DIR"] +obs_dir = os.environ["OBS_DATA"] +casename = os.environ["CASENAME"] +start_date = os.environ["startdate"] +end_date = os.environ["enddate"] +first_year = os.environ["first_yr"] +last_year = os.environ["last_yr"] +static_thresh= os.environ['do_static_threshold'] +min_lat = float(os.environ["min_lat"]) +max_lat = float(os.environ["max_lat"]) +min_lon = float(os.environ["min_lon"]) +max_lon = float(os.environ["max_lon"]) +regrid_method= os.environ["regrid_method"] + +#Define lats to average tauu over and lon range to analyze +lat_lim_list = [min_lat, max_lat] +lon_lim_list = [min_lon, max_lon] + +########################################################################### +########### Part 1 ######################################################## +########### Get TropFlux observations needed for regridding and plotting +########################################################################### +print(f'*** Now working on obs data\n------------------------------') +obs_file_WWEs = obs_dir + '/TropFlux_120-dayHPfiltered_tauu_1980-2014.nc' + +print(f'*** Reading obs data from {obs_file_WWEs}') +obs_WWEs = xr.open_dataset(obs_file_WWEs) +print(obs_WWEs) + +# Subset the data for the user defined first and last years # +obs_WWEs = obs_WWEs.sel(time=slice(first_year, last_year)) + +obs_lons = obs_WWEs.lon +obs_lats = obs_WWEs.lat +obs_time = obs_WWEs.time +Pac_lons = obs_WWEs.Pac_lon +obs_WWE_mask = obs_WWEs.WWE_mask +TropFlux_filt_tauu = obs_WWEs.filtered_tauu +TropFlux_WWEsperlon = obs_WWEs.WWEs_per_lon + +################################################################################### +######### PART 2 ################################################################## +######### Prepare Model output for WWE ID code##################################### +################################################################################### +print(f'*** Now starting work on {casename}\n------------------------------') +print('*** Reading variables ...') + +#These variables come from the case_env_file that the framework creates +#the case_env_file points to the csv file, which in turn points to the data files. +#Variables from the data files are then read in. See example_multicase.py +# Read the input model data + +case_env_file = os.environ["case_env_file"] +assert os.path.isfile(case_env_file), f"case environment file not found" +with open(case_env_file, 'r') as stream: + try: + case_info = yaml.safe_load(stream) + except yaml.YAMLError as exc: + print(exc) + +cat_def_file = case_info['CATALOG_FILE'] +case_list = case_info['CASE_LIST'] + +#Use partial function to only load part of the data file +lon_bnds, lat_bnds = (0, 360), (-32.5, 32.5) +partial_func = partial(_preprocess, lon_bnds=lon_bnds, lat_bnds=lat_bnds) + +# open the csv file using information provided by the catalog definition file +cat = intake.open_esm_datastore(cat_def_file) + +# all cases share variable names and dimension coords in this example, so just get first result for each +tauu_var = [case['tauu_var'] for case in case_list.values()][0] +time_coord = [case['time_coord'] for case in case_list.values()][0] +lat_coord = [case['lat_coord'] for case in case_list.values()][0] +lon_coord = [case['lon_coord'] for case in case_list.values()][0] + +############################################################################ +#Filter catalog by desired variable and output frequency +############################################################################ +#Get tauu (zonal wind stress) variable +tauu_subset = cat.search(variable_id=tauu_var, frequency="day") + +# convert tauu_subset catalog to an xarray dataset dict +tauu_dict = tauu_subset.to_dataset_dict(preprocess = partial_func, + xarray_open_kwargs={"decode_times": True, "use_cftime": True}) + +for k, v in tauu_dict.items(): + tauu_arr = tauu_dict[k][tauu_var] + +################################################################## +#Get sftlf (land fraction) variable if it exists & mask out land +################################################################## +key = 'sftlf_var' +x = list(case_list[casename].keys()) + +if(x.count(key) == 1): + print("Using model land fraction variable to mask out land") + sftlf_var = [case['sftlf_var'] for case in case_list.values()][0] + sftlf_subset = cat.search(variable_id=sftlf_var, frequency="fx") + # convert sftlf_subset catalog to an xarray dataset dict + sftlf_dict = sftlf_subset.to_dataset_dict(preprocess = partial_func) + + for k, v in sftlf_dict.items(): + sftlf_arr = sftlf_dict[k][sftlf_var] + + #mask out land in tauu + masked_tauu = tauu_arr.where(sftlf_arr < 10) + +if(x.count(key) == 0): + print("Need to use etopo.nc file to mask out the land") + print('Program will exit for now, as need to build in more code') + #ls_mask = land_mask_using_etopo(ds = model_ds, topo_latgrid_1D = topo_latgrid_1D, + # topo_longrid_1D = topo_longrid_1D, + # topo_data1D = topo_data1D, lf_cutoff = 10) + #masked_tauu = model_ds[tauu_name].where(ls_mask == 1) + sys.exit() + +if(x.count(key) > 1): + print('Error: Multiple land fraction (sftlf) files found. There should only be one!') + print('Program will exit') + sys.exit() + +################################################## +#Convert masked_tauu dataaray back to dataset +tauu_ds = masked_tauu.to_dataset() + +################################################## +#Only keep data during desired time range +tauu_ds = tauu_ds.where((tauu_ds.time.dt.year >= int(first_year)) & + (tauu_ds.time.dt.year <= int(last_year)), drop = True) + +################################################## +#Create a mask variable for the tauu dataset +tauu_ds["mask"] = xr.where(~np.isnan(tauu_ds[tauu_var].isel(time = 0)), 1, 0) + +################################################## +#Regrid tauu to the TropFlux obs grid +################################################## +print('lon size before regridding:', tauu_ds.lon.size) +print('Start regrid code using the following method:', regrid_method) + +if tauu_ds.lat.size > 1: + print('tauu_ds.lat.size > 1') + regridder_tauu = regridder_model2obs(lon_vals = np.asarray(obs_lons), lat_vals = np.asarray(obs_lats), + in_data = tauu_ds, type_name = regrid_method, + isperiodic = True) + re_model_tauu = regridder_tauu(tauu_ds[tauu_var], skipna = True) + +print('lon size after regridding:', re_model_tauu.lon.size) + +################################################## +#Find region of interest +#At this point, re_model_tauu is a DataArray +################################################## +tauu_region = ((re_model_tauu).where( + (re_model_tauu.lat >= np.array(lat_lim_list).min()) & + (re_model_tauu.lat <= np.array(lat_lim_list).max()) & + (re_model_tauu.lon >= np.array(lon_lim_list).min()) & + (re_model_tauu.lon <= np.array(lon_lim_list).max()), + drop = True)) + +print('tauu_region:', tauu_region) + +################################################## +#Average over the latitudes +################################################## +#The xarray mean function ignores the nans +tauu_region_latavg = tauu_region.mean(dim = 'lat') + +################################################################################### +#Check to see if westerly zonal wind stresses are recorded as positive or negative +################################################################################### +mean_lon220p5 = np.array(np.mean(tauu_region_latavg.sel(lon = 220.5))) +print('mean tauu at 220.5E:', mean_lon220p5) +factor = -1 if mean_lon220p5 > 0 else 1 +tauu = tauu_region_latavg * factor + +#Control the chunk size because the chunk size when computing data2use below goes to (1, 1) +#using the latest MDTF python 3.12, which then takes forever to run +tauu = tauu.chunk({"time": tauu["time"].size}) + +print('tauu after lat averaging:', tauu) +print('At this point, tauu is a DataArray with time longitude dimensions on the TropFlux grid') + +################################################################################### +#Filter tauu to use as input to find WWEs and their chracteristics +################################################################################### +#filt_dataLP = filter_data(data = tauu, nweights = 201, a = 5) +#For now the only option is to apply a 120-day highpass filter +filt_dataHP = filter_data(data = tauu, nweights = 201, a = 120) + +#As above, control the chunk size +filt_dataHP = filt_dataHP.chunk({"time": tauu["time"].size}) + +data2use = tauu - filt_dataHP +obs_tauu_thresh = 0.04 #Nm-2 Two standard deviations of the TropFlux lat-averaged 120E-280E zonal wind stress. +tauu_thresh2use = obs_tauu_thresh if static_thresh is True else np.round(data2use.std()*2, decimals = 2) + +print('tauu_thresh2use:', tauu_thresh2use) +print('data2use', data2use) + +################################################################################### +######### PART 3 ################################################################## +######### Find & Save WWEs and their characteristics & statistics ################# +################################################################################### +duration, IWW, zonal_extent, tauu_mean, WWE_labels, WWE_mask, center_lons, \ +center_times, min_times, max_times, min_lons, max_lons, lon_array, tauu_time = \ +find_WWEs_and_characteristics(in_data = data2use, tauu_thresh = tauu_thresh2use, mintime = 5, minlons = 10, + xminmax = (3, 3), yminmax = (3, 3), minmax_dur_bins = (5, 27), + dur_bin_space = 2, minmax_IWW_bins = (1, 42), IWW_bin_space = 4, + xtend_past_lon = 140) + +durationB, zonal_extentB, tauu_sum, tauu_abs_mean = WWE_characteristics(wwe_labels = WWE_labels, data = tauu) + + +print('nWWEs:', duration.size) +print('') + +################################################################################## +# Save the WWE characteristics and statistics to a netcdf file +################################################################################## +print('Saving the WWE characteristics to a netcdf file') + +save_name = f"{work_dir}/model/netCDF/{casename}.{first_year}-{last_year}" + +WWE_chars = save_filtered_tauu_WWEchar(WWE_labels = WWE_labels, WWE_mask = WWE_mask, tauu_anom_vals = np.asarray(data2use), + duration = duration, IWW = IWW, zonal_extent = zonal_extent, + tauu_anom_mean = tauu_mean, tauu_abs_mean = tauu_abs_mean, + center_lon_vals = center_lons, center_time_vals = np.asarray(center_times), + min_lon_vals = min_lons, max_lon_vals = max_lons, + min_time_vals = np.asarray(min_times), max_time_vals = np.asarray(max_times), + lon_array = lon_array, tauu_time = np.asarray(tauu_time), save_name = save_name) + +################################################################################### +######### PART 4 ################################################################## +######### Plot Hovmollers and histograms for observations and model ############### +################################################################################### +#Plot the yearly Hovmollers for observations +plot_model_Hovmollers_by_year(data = TropFlux_filt_tauu, wwe_mask = obs_WWE_mask, + lon_vals = Pac_lons, tauu_time = obs_time, + savename = f"{work_dir}/obs/PS/TropFlux", + start_date = '1980-1999', end_date = '2000-2014') + +########################################################################### +# Plot Homollers for Model +########################################################################### +plot_model_Hovmollers_by_year(data = data2use, wwe_mask = WWE_mask, + lon_vals = lon_array, tauu_time = tauu_time, + savename = f"{work_dir}/model/PS/{casename}", + start_date = start_date, end_date = end_date) + + +########################################################################### +# Plot the likelihood of a WWE affecting a given 1degree longitude +########################################################################### +#Convert WWE_lavels from a numpy array to a DataArray +WWE_labels_da = xr.DataArray(data=WWE_labels, dims = ['time', 'lon'], + coords=dict( + lon=(['lon'], lon_array), + time=tauu_time,), + attrs=dict(description="WWE labels", units = 'N/A',) + ) + + +#Call function events_per_lon in WWE_diag_tools.py, which calculates the +#number of unique WWEs affecting a given 1degree longitude bin (i.e., count_all_event_lons) +count_all_event_lons, nall_events = events_per_lon(in_da = WWE_labels_da) + + +#Convert count_all_even_lons into a probability per day +obs_prop_per_day = TropFlux_WWEsperlon/obs_time.size*100. +model_prop_per_day = count_all_event_lons/tauu_time.size*100. + +#Save the count and probability of a unique WWE per 1degree longitude bin +data_vars = dict( + model_WWEs_per_lon =(['lon'], count_all_event_lons, dict(units='count', long_name='Number of unique WWEs affecting a 1degree lon bin from the model')), + model_freq_WWEs_per_lon =(['lon'], model_prop_per_day, dict(units='fractuion', long_name='The fraction of unique WWEs affecting a 1degree lon bin from the model calculated as count/ndays ')), + obs_WWEs_per_lon =(['lon'], np.asarray(TropFlux_WWEsperlon), dict(units='count', long_name='Number of unique WWEs affecting a 1degree lon bin in TropFlux observations from 1980-2014')), + obs_freq_WWEs_per_lon =(['lon'], np.asarray(obs_prop_per_day), dict(units='fractuion', long_name='The fraction of unique WWEs affecting a 1degree lon bin from TropFlux calculated as count/ndays ')) +) + +ds = xr.Dataset(data_vars = data_vars, + coords=dict(lon = lon_array,), + attrs=dict(description= "Variables needed to make the *model*_and_TropFlux_WWE_prob_per_day.png figures that the WWEs POD produces") + ) + +ds.to_netcdf(f"{work_dir}/model/netCDF/{casename}_and_TropFlux_WWE_probability_per_day.nc") + + +# ***** Make the plot ****** +save_path = f"{work_dir}/model/PS/" +model_titlename = {casename} + +plot_WWE_likelihood_per_lon(lons = Pac_lons, model_prop_per_day = model_prop_per_day, + obs_prop_per_day = obs_prop_per_day, savepath = save_path, + model_name = casename) +################################################################################### +######### PART 5 ################################################################## +#Close the catalog files and release variable dict reference for garbage collection +################################################################################### +cat.close() +tauu_dict = None + +################################################################################### +######### PART 6 ################################################################## +######### Confirm POD executed successfully ####################################### +################################################################################### +# ---------------------------------------- +print("Last log message by example_multicase POD: finished successfully!") +sys.exit(0) + diff --git a/diagnostics/WWEs/WWEs_run_config.yml b/diagnostics/WWEs/WWEs_run_config.yml new file mode 100644 index 000000000..a5c45b96b --- /dev/null +++ b/diagnostics/WWEs/WWEs_run_config.yml @@ -0,0 +1,76 @@ +# Runtime yaml configuration file template for the MDTF-diagnostics package +# Create a copy of this file for personal use, and pass it to the framework +# with the -f | --configfile flag + +# List of POD(s) to run +pod_list: + - "WWEs" + +# Case list entries (must be unique IDs for each simulation) +#case_list: +# "INM-CM4-8_historical_r1i1p1f1_gr1" : +# model: "INM" +# convention: "CMIP" +# startdate: "19800101" +# enddate: "20141231" + +case_list: + "CESM2_historical_r1i1p1f1_gn" : + model: "CESM2" + convention: "CMIP" + startdate: "19800101" + enddate: "20150101" + +# "tauu_Eday_CESM2_historical_r1i1p1f1_gn_19900101-19991231" : +# model: "CESM2" +# convention: "CMIP" +# startdate: "19900101000000" +# enddate: "19991231000000" + +### Data location settings ### +# Required: full or relative path to ESM-intake catalog header file +DATA_CATALOG: "./diagnostics/WWEs/esm_catalog_CESM2_historical_r1i1p1f1.json" +# Optional: Parent directory containing observational data used by individual PODs. +# If defined, the framework assumes observational data is in OBS_DATA_ROOT/[POD_NAME] +OBS_DATA_ROOT: "../inputdata/obs_data" +# Required: Working directory location. Temporary files are written here. +# Final output is also written here if the OUTPUT_DIR is not defined. +WORK_DIR: "../wkdir" +# Optional: Location to write final output if you don't want it in the wkdir +OUTPUT_DIR: "../wkdir" +### Environment Settings ### +# Required: Location of the Anaconda/miniconda installation to use for managing +# dependencies (path returned by running `conda info --base`.) +conda_root: "/Users/eriley/anaconda3" +# Optional: Directory containing the framework-specific conda environments. This should +# be equal to the "--env_dir" flag passed to conda_env_setup.sh. If left +# blank, the framework will look for its environments in conda_root/envs +conda_env_root: "/Users/eriley/anaconda3/envs" +# Location of micromamba executable; REQUIRED if using micromamba +micromamba_exe: "" +### Data type settings ### +# set to true to handle data files > 4 GB +large_file: False +### Output Settings ### +# Set to true to have PODs save postscript figures in addition to bitmaps. +save_ps: False +# If true, leave pp data in OUTPUT_DIR after preprocessing; if false, delete pp data after PODs +# run to completion +save_pp_data: True +# Set to true to perform data translation; default is True: +translate_data: True +# Set to true to save HTML and bitmap plots in a .tar file. +make_variab_tar: False +# Set to true to overwrite results in OUTPUT_DIR; otherwise results saved +# under a unique name. +overwrite: False +# Generate html output for multiple figures per case +"make_multicase_figure_html": False +### Developer settings ### +# Set to True to run the preprocessor +run_pp: False +# Additional custom preprocessing scripts to run on data +# place these scripts in the MDTF-diagnostics/user_scripts directory +# The framework will run the specified scripts whether run_pp is set to True or False +user_pp_scripts: + - "" diff --git a/diagnostics/WWEs/doc/WWE.rst b/diagnostics/WWEs/doc/WWE.rst new file mode 100644 index 000000000..e149b0bfc --- /dev/null +++ b/diagnostics/WWEs/doc/WWE.rst @@ -0,0 +1,137 @@ +.. This is a comment in RestructuredText format (two periods and a space). +Westerly Wind Event Diagnostic Documentation +================================ + +Last update: 2024-09-19 + +This POD identifies westerly wind events (WWEs) in the equatorial Pacific +(120E-280E) using daily equatrially-averaged (2.5S-2.5N) 120-day +highpass filtered zonal wind stress. WWEs in TropFlux observations are +compared to WWEs produced by earth system models. Two sets of figures +are created: + +1. Hovmollers of 120-day highpass filtered zonal wind stress for each + year of the data considered. Each year is a separate panel with the + first 20 years occurring on one output figure and the next 20 years + on the second output figure. Time-longitude zonal wind stress + patches that qualify as WWEs are outlined in black. The Hovmollers + are created for both the TropFlux observations and the ESM. + +2. A 1D histogram of the likelihood of each 1 longitude +bin across the Pacific experiencing a WWE per day. The likelihood is +calculated as the total number of unique events at each 1 longitude +divided by the total number of days in each data set. The reciprocal +of the frequncy indicates the average return rate of a WWE per +longitude or the average number of days between WWEs at each +longitude. + +The variables needed to created these figures are saved in the +follwing directories: +{work_dir}/WWEs/model/netCDF/ and /inputdata/obs_data/WWEs/ + +Version & Contact info +---------------------- +- Version/revision information: v1.0 (September 2024) +- PI: Emily M. Riley Dellaripa (Emily.Riley@colostate.edu), Colorado State University +- Current Developer: Emily M. Riley Dellaripa (Emily.Riley@colostate.edu), Colorado State University +- Contributors: Charlotte A. DeMott (CSU); Eric D. Maloney (CSU); + Jingxuan Cui (CSU) + +Open source copyright agreement +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The MDTF framework is distributed under the LGPLv3 license (see LICENSE.txt). + +Functionality +------------- + +This POD is composed of two files. The main driver script is +``WWEs.py``, which contains the code and functions which perform the +diagnostic computations and call the plotting code. The plotting code +and additional functions needed to identify the WWEs and determine +their characteristics are contained in ``WWE_diag_tools.py``. The main +driver script reads in the necessary data, prepares the zonal wind +stress for identifying WWEs, calls the function that identifies the +WWEs and their characteristics, saves information to netcdf files +including: the WWE labels and mask and their characteristics (i.e., +central time and longitude, zonal extent, duration, and integrated +wind work (i.e., total forcing of zonal wind stress to the ocean)), +and finally calls the functions that make the plots. + +The observational data used is TropFlux daily zonal surface fluxes +that are available from 1980-2014 (Praveen Kumar 2014). + +The POD is based on Riley Dellaripa et al. (2024) and details of the +methods, motivation, and results of our work can be found there. + +By default the WWEs in the model output are identified using a 0.04 N/m2 +threshold, which reflects two standard deviations of the +equatorially-averaged 120-day highpass filtered TropFlux zonal wind +stress between 1980-2014. Users can adjust the settings.jsonc file to +not use the 0.04 N/m2 threshold, but instead calculate the models two +standard deviations of its equatorially-averaged 120-day highpass +filtered zonal wind stress and use that as the threshold for +identifying the model's WWEs. + +Required programming language and libraries +------------------------------------------- + +* Python >= 3.12 +* matplotlib +* xarray +* numpy +* os +* time +* xesmf +* scipy +* functools +* intake +* yaml +* sys + +These dependencies are included in the python3_base environment +provided by the automated installation script +for the MDTF Framework. + +Required model output variables +------------------------------- + +This POD requires the following variables: + +* Zonal wind stress (TAUU, 3D (time-lat-lon), Units: Pa, Frequency: Daily +* Percentage of the grid cell occupied by land (sftlf), 2D + (lat-lon), Units: %, Frequency: Fixed + + +References +---------- +1. Riley Dellaripa et al. (2024): Evaluation of Equatorial Westerly + Wind Events in the Pacific Ocean in CMIP6 Models. *J. Climate*, + `doi: 10.1175/JCLI-D-23-0629.1 < https://doi.org/10.1175/JCLI-D-23-0629.1>`__. + +2. Praveen Kumar,B., J. Vialard, M. Lengaigne, V. S. N. Murty, M. J. McPhaden M. F. Cronin, F. Pinsard, + and K. Gopala Reddy, 2013: TropFlux wind stresses over the tropical + oceans: evaluation and comparison with other products. Clim. Dyn., + 40, 2049–2071, `doi: 10.1007/s00382-012-1455-4 < https://doi.org/10.1007/s00382-012-1455-4>`__. + +More about this diagnostic +-------------------------- +Westerly wind events (WWEs) are anomalously strong, long lasting +westerlies that occur primarily over the Pacific Ocean. The momentum +imparted on the ocean from WWEs have the potential to excite eastward +propagating oceanic Kelvin wave. The oceanic Kelvin waves, in turn, +warm the ocean surface and depress the thermocline as they propagate +eastward, which can have impacts on ENSO development. Because of WWEs +relationship to oceanic Kelvin waves and ENSO development, it is +important to determine how well earth system models (ESMs) replicate +them. The inability for a model to accurately replicate WWE frequency, +location, and strength has potential consequences for ENSO +development. WWEs are frequently associated with Madden Julian +Oscillaiton (MJO) events or equatorial convectively coupled Rosby +waves (CRWs), though MJO and CRWs are not the only source of WWEs. The +inability of a model to appropriately represent WWE forcing can be +linked to deficiencies in a models representation of MJO and CRW +variability, though there are some models that accurately capture MJO +and CRW variability while still misprepresenting WWE forcing in the +west Pacific. For more details on this work please see Riley Dellaripa +et al. (2014). diff --git a/diagnostics/WWEs/esm_catalog_CESM2_historical_r1i1p1f1.csv b/diagnostics/WWEs/esm_catalog_CESM2_historical_r1i1p1f1.csv new file mode 100644 index 000000000..f84762f87 --- /dev/null +++ b/diagnostics/WWEs/esm_catalog_CESM2_historical_r1i1p1f1.csv @@ -0,0 +1,3 @@ +activity_id,branch_method,branch_time_in_child,branch_time_in_parent,experiment,experiment_id,frequency,grid,grid_label,institution_id,nominal_resolution,parent_activity_id,parent_experiment_id,parent_source_id,parent_time_units,parent_variant_label,product,realm,source_id,source_type,sub_experiment,sub_experiment_id,table_id,variable_id,variant_label,member_id,standard_name,long_name,units,vertical_levels,init_year,start_time,end_time,time_range,path,version +CMIP,standard,674885,219000,all-forcing simulation of the recent past,historical,day,native 0.9x1.25 finite volume grid (192x288 latxlon),gn,NCAR,100 km,CMIP,piControl,CESM2,days since 0001-01-01 00:00:00,r1i1p1f1,model-output,atmos,CESM2,AOGCM BGC,none,none,Eday,tauu,r1i1p1f1,r1i1p1f1,surface_downward_eastward_stress,Surface Downward Eastward Wind Stress,Pa,1,,1/1/80 0:00,1/1/15 0:00,1980-01-01 00:00:00-2015-01-01 00:00:00,/Users/eriley/NOAA_POD/model_data/CESM2/tauu/tauu_Eday_CESM2_historical_r1i1p1f1_gn_19800101-20150101.nc,v0 +CMIP,standard,674885,219000,all-forcing simulation of the recent past,historical,fx,native 0.9x1.25 finite volume grid (192x288 latxlon),gn,NCAR,100 km,CMIP,piControl,CESM2,days since 0001-01-01 00:00:00,r1i1p1f1,model-output,atmos,CESM2,AOGCM BGC,none,none,fx,sftlf,r1i1p1f1,r1i1p1f1,land_area_fraction,Percentage of the grid cell occupied by land (including lakes),%,1,,,,,/Users/eriley/NOAA_POD/model_data/sftlf_historical/CESM2/sftlf_fx_CESM2_historical_r1i1p1f1_gn.nc,v0 \ No newline at end of file diff --git a/diagnostics/WWEs/esm_catalog_CESM2_historical_r1i1p1f1.json b/diagnostics/WWEs/esm_catalog_CESM2_historical_r1i1p1f1.json new file mode 100644 index 000000000..e37e6c28c --- /dev/null +++ b/diagnostics/WWEs/esm_catalog_CESM2_historical_r1i1p1f1.json @@ -0,0 +1,181 @@ +{ + "esmcat_version": "0.0.1", + "attributes": [ + { + "column_name": "activity_id", + "vocabulary": "" + }, + { + "column_name": "branch_method", + "vocabulary": "" + }, + { + "column_name": "branch_time_in_child", + "vocabulary": "" + }, + { + "column_name": "branch_time_in_parent", + "vocabulary": "" + }, + { + "column_name": "experiment", + "vocabulary": "" + }, + { + "column_name": "experiment_id", + "vocabulary": "" + }, + { + "column_name": "frequency", + "vocabulary": "" + }, + { + "column_name": "grid", + "vocabulary": "" + }, + { + "column_name": "grid_label", + "vocabulary": "" + }, + { + "column_name": "institution_id", + "vocabulary": "" + }, + { + "column_name": "nominal_resolution", + "vocabulary": "" + }, + { + "column_name": "parent_activity_id", + "vocabulary": "" + }, + { + "column_name": "parent_experiment_id", + "vocabulary": "" + }, + { + "column_name": "parent_source_id", + "vocabulary": "" + }, + { + "column_name": "parent_time_units", + "vocabulary": "" + }, + { + "column_name": "parent_variant_label", + "vocabulary": "" + }, + { + "column_name": "product", + "vocabulary": "" + }, + { + "column_name": "realm", + "vocabulary": "" + }, + { + "column_name": "source_id", + "vocabulary": "" + }, + { + "column_name": "source_type", + "vocabulary": "" + }, + { + "column_name": "sub_experiment", + "vocabulary": "" + }, + { + "column_name": "sub_experiment_id", + "vocabulary": "" + }, + { + "column_name": "table_id", + "vocabulary": "" + }, + { + "column_name": "variable_id", + "vocabulary": "" + }, + { + "column_name": "variant_label", + "vocabulary": "" + }, + { + "column_name": "member_id", + "vocabulary": "" + }, + { + "column_name": "standard_name", + "vocabulary": "" + }, + { + "column_name": "long_name", + "vocabulary": "" + }, + { + "column_name": "units", + "vocabulary": "" + }, + { + "column_name": "vertical_levels", + "vocabulary": "" + }, + { + "column_name": "init_year", + "vocabulary": "" + }, + { + "column_name": "start_time", + "vocabulary": "" + }, + { + "column_name": "end_time", + "vocabulary": "" + }, + { + "column_name": "time_range", + "vocabulary": "" + }, + { + "column_name": "path", + "vocabulary": "" + }, + { + "column_name": "version", + "vocabulary": "" + } + ], + "assets": { + "column_name": "path", + "format": "netcdf", + "format_column_name": null + }, + "aggregation_control": { + "variable_column_name": "variable_id", + "groupby_attrs": [ + "activity_id", + "institution_id", + "source_id", + "experiment_id", + "frequency", + "member_id", + "table_id", + "grid_label", + "realm", + "variant_label" + ], + "aggregations": [ + { + "type": "union", + "attribute_name": "variable_id", + "options": {} + } + ] + }, + "id": "esm_catalog_CESM2_historical_r1i1p1f1", + "description": null, + "title": null, + "last_updated": "2025-03-03T20:03:41Z", + "catalog_file": "file:///Users/eriley/mdtf/MDTF-diagnostics/diagnostics/WWEs/esm_catalog_CESM2_historical_r1i1p1f1.csv" +} \ No newline at end of file diff --git a/diagnostics/WWEs/esm_catalog_INM-CM4-8_historical_r1i1p1f1.csv b/diagnostics/WWEs/esm_catalog_INM-CM4-8_historical_r1i1p1f1.csv new file mode 100644 index 000000000..0b3bf6a98 --- /dev/null +++ b/diagnostics/WWEs/esm_catalog_INM-CM4-8_historical_r1i1p1f1.csv @@ -0,0 +1,3 @@ +activity_id,branch_method,branch_time_in_child,branch_time_in_parent,experiment,experiment_id,frequency,grid,grid_label,institution_id,nominal_resolution,parent_activity_id,parent_experiment_id,parent_source_id,parent_time_units,parent_variant_label,product,realm,source_id,source_type,sub_experiment,sub_experiment_id,table_id,variable_id,variant_label,member_id,standard_name,long_name,units,vertical_levels,init_year,start_time,end_time,time_range,path,version +CMIP,standard,0,35405,all-forcing simulation of the recent past,historical,day,gs2x1.5,gr1,INM,100 km,CMIP,piControl,INM-CM4-8,days since 1850-01-01,r1i1p1f1,model-output,atmos,INM-CM4-8,AOGCM AER,none,none,Eday,tauu,r1i1p1f1,r1i1p1f1,surface_downward_eastward_stress,Surface Downward Eastward Wind Stress,Pa,1,,1/1/80 12:00,12/31/14 12:00,1980-01-01 12:00:00-2014-12-31 12:00:00,/Users/eriley/NOAA_POD/model_data/INM-CM4-8/tauu/tauu_Eday_INM-CM4-8_historical_r1i1p1f1_gr1_19800101-20141231.nc,v0 +CMIP,standard,0,35405,all-forcing simulation of the recent past,historical,fx,gs2x1.5,gr1,INM,100 km,CMIP,piControl,INM-CM4-8,days since 1850-01-01,r1i1p1f1,model-output,atmos,INM-CM4-8,AOGCM AER,none,none,fx,sftlf,r1i1p1f1,r1i1p1f1,land_area_fraction,Percentage of the grid cell occupied by land (including lakes),%,1,,,,,/Users/eriley/NOAA_POD/model_data/sftlf_historical/INM-CM4-8/sftlf_fx_INM-CM4-8_historical_r1i1p1f1_gr1.nc,v0 \ No newline at end of file diff --git a/diagnostics/WWEs/esm_catalog_INM-CM4-8_historical_r1i1p1f1.json b/diagnostics/WWEs/esm_catalog_INM-CM4-8_historical_r1i1p1f1.json new file mode 100644 index 000000000..3a66d8d24 --- /dev/null +++ b/diagnostics/WWEs/esm_catalog_INM-CM4-8_historical_r1i1p1f1.json @@ -0,0 +1,181 @@ +{ + "esmcat_version": "0.0.1", + "attributes": [ + { + "column_name": "activity_id", + "vocabulary": "" + }, + { + "column_name": "branch_method", + "vocabulary": "" + }, + { + "column_name": "branch_time_in_child", + "vocabulary": "" + }, + { + "column_name": "branch_time_in_parent", + "vocabulary": "" + }, + { + "column_name": "experiment", + "vocabulary": "" + }, + { + "column_name": "experiment_id", + "vocabulary": "" + }, + { + "column_name": "frequency", + "vocabulary": "" + }, + { + "column_name": "grid", + "vocabulary": "" + }, + { + "column_name": "grid_label", + "vocabulary": "" + }, + { + "column_name": "institution_id", + "vocabulary": "" + }, + { + "column_name": "nominal_resolution", + "vocabulary": "" + }, + { + "column_name": "parent_activity_id", + "vocabulary": "" + }, + { + "column_name": "parent_experiment_id", + "vocabulary": "" + }, + { + "column_name": "parent_source_id", + "vocabulary": "" + }, + { + "column_name": "parent_time_units", + "vocabulary": "" + }, + { + "column_name": "parent_variant_label", + "vocabulary": "" + }, + { + "column_name": "product", + "vocabulary": "" + }, + { + "column_name": "realm", + "vocabulary": "" + }, + { + "column_name": "source_id", + "vocabulary": "" + }, + { + "column_name": "source_type", + "vocabulary": "" + }, + { + "column_name": "sub_experiment", + "vocabulary": "" + }, + { + "column_name": "sub_experiment_id", + "vocabulary": "" + }, + { + "column_name": "table_id", + "vocabulary": "" + }, + { + "column_name": "variable_id", + "vocabulary": "" + }, + { + "column_name": "variant_label", + "vocabulary": "" + }, + { + "column_name": "member_id", + "vocabulary": "" + }, + { + "column_name": "standard_name", + "vocabulary": "" + }, + { + "column_name": "long_name", + "vocabulary": "" + }, + { + "column_name": "units", + "vocabulary": "" + }, + { + "column_name": "vertical_levels", + "vocabulary": "" + }, + { + "column_name": "init_year", + "vocabulary": "" + }, + { + "column_name": "start_time", + "vocabulary": "" + }, + { + "column_name": "end_time", + "vocabulary": "" + }, + { + "column_name": "time_range", + "vocabulary": "" + }, + { + "column_name": "path", + "vocabulary": "" + }, + { + "column_name": "version", + "vocabulary": "" + } + ], + "assets": { + "column_name": "path", + "format": "netcdf", + "format_column_name": null + }, + "aggregation_control": { + "variable_column_name": "variable_id", + "groupby_attrs": [ + "activity_id", + "institution_id", + "source_id", + "experiment_id", + "frequency", + "member_id", + "table_id", + "grid_label", + "realm", + "variant_label" + ], + "aggregations": [ + { + "type": "union", + "attribute_name": "variable_id", + "options": {} + } + ] + }, + "id": "esm_catalog_CESM2_historical_r1i1p1f1", + "description": null, + "title": null, + "last_updated": "2025-03-03T18:09:54Z", + "catalog_file": "file:///Users/eriley/mdtf/MDTF-diagnostics/diagnostics/WWEs/esm_catalog_INM-CM4-8_historical_r1i1p1f1.csv" +} diff --git a/diagnostics/WWEs/settings.jsonc b/diagnostics/WWEs/settings.jsonc new file mode 100644 index 000000000..e5bbbcdc4 --- /dev/null +++ b/diagnostics/WWEs/settings.jsonc @@ -0,0 +1,67 @@ +// Basic POD Settings +{ + "settings" : { + "description" : "WWE POD", + "driver" : "WWEs.py", + "long_name" : " WWE POD", + "convention": "cmip", + "runtime_requirements": { + "python3": ["matplotlib", "xarray", "netCDF4", "pandas", "xesmf", "scipy"] + }, + "pod_env_vars": { + "first_yr": "1980", + "last_yr": "2014", + // End of 20 is the last year in the first 20 years analyzed. This will be the first page of Hovmollers created + "endof20": "1999", + // The first year in the second set of 20 years worth of Hovmollers created + "start2nd20": "2000", + // Lower latitude limit for zonal wind stress lat band avgs (defaults to -2.5) + "min_lat": "-2.5", + // Upper latitude limit for zonal wind stress lat band avgs (defaults to 2.5) + "max_lat": "2.5", + // Lower longitude limit for zonal wind stress, lons should be 0-360 (defaults to 120) + "min_lon": "120", + // Upper longitude limit for zonal wind stresx, lons should be 0-360 (defaults to 280) + "max_lon": "280", + // Choose how native model grid should be regridded to match TropFlux grid (default is 'conservative_normed') + // The other option is biliniar + "regrid_method": "conservative_normed", + //How the zonal wind stress threshold to ID WWEs is determine. + //Default True uses the same 2sigma threshold as the obs. + //If False, the model's 2sigma tauu is the threshold + "do_static_threshold": "True" + } + }, +// Variable Coordinates + "dimensions": { + "lat": { + "standard_name": "latitude", + "units": "degrees_north", + "axis": "Y" + }, + "lon": { + "standard_name": "longitude", + "units": "degrees_east", + "axis": "X" + }, + "time": {"standard_name": "time"} + }, + +// Variables + "varlist" : { + "tauu": { + "frequency" : "day", + "dimensions": ["time", "lat", "lon"], + "standard_name" : "surface_downward_eastward_stress", + "realm": "atmos", + "units": "Pa" + }, + "sftlf": { + // "frequency": "fx", + "dimensions": ["lat", "lon"], + "standard_name": "land_area_fraction", + "realm": "land", + "units": "%" + } + } +} \ No newline at end of file diff --git a/diagnostics/example_multicase/ERD_multirun_config_template.jsonc b/diagnostics/example_multicase/ERD_multirun_config_template.jsonc new file mode 100644 index 000000000..a5362c26b --- /dev/null +++ b/diagnostics/example_multicase/ERD_multirun_config_template.jsonc @@ -0,0 +1,114 @@ +// This a template for configuring MDTF to run PODs that analyze multi-run/ensemble data +// +// Copy this file, rename it, and customize the settings as needed +// Pass your file to the framework using the -f/--input-file flag. +// Any other explicit command line options will override what's listed here. +// +// All text to the right of an unquoted "//" is a comment and ignored, as well +// as blank lines (JSONC quasi-standard.) +// +// Remove your test config file, or any changes you make to this template if you do not rename it, +// from your remote repository before you submit a PR for review. +// To generate CMIP synthetic data in the example dataset, run the following: +// > mamba env create --force -q -f ./src/conda/_env_synthetic_data.yml +// > conda activate _MDTF_synthetic_data +// > pip install mdtf-test-data +// > cd /mdtf +// > mkdir mdtf_test_data && cd mdtf_test_data +// > mdtf_synthetic.py -c CMIP --startyear 1980 --nyears 5 +// > mdtf_synthetic.py -c CMIP --startyear 1985 --nyears 5 +// Note that MODEL_DATA_ROOT assumes that mdtf_test_data is one directory above MDTF-diagnostics +// in this sample config file +{ + // Run each ensemble on the example POD. + // Add other PODs that work on ensemble datasets to the pod_list as needed + "pod_list" : [ + "example_multicase" + ], + // Each case corresponds to a different simulation/output dataset + "case_list": + { + "CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231": + { + "model": "test", + "convention": "CMIP", + "startdate": "19800101", + "enddate": "19841231" + }, + "CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231": + { + "model": "test", + "convention": "CMIP", + "startdate": "19850101", + "enddate": "19891231" + } + }, + + // PATHS --------------------------------------------------------------------- + // Location of supporting data downloaded when the framework was installed. + // If a relative path is given, it's resolved relative to the MDTF-diagnostics + // code directory. Environment variables (eg, $HOME) can be referenced with a + // "$" and will be expended to their current values when the framework runs. + // Full or relative path to model data ESM-intake catalog header file + "DATA_CATALOG": "./diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json", + + // Backwards compatibility + //"MODEL_DATA_ROOT": "./inputdata/mdtf_test_data", + + // Parent directory containing observational data used by individual PODs. + "OBS_DATA_ROOT": "../inputdata/obs_data", + + // Working directory. + "WORK_DIR": "../wkdir", + + // Directory to write output. The results of each run of the framework will be + // put in a subdirectory of this directory. Defaults to WORKING_DIR if blank. + "OUTPUT_DIR": "../wkdir", + + // Location of the Anaconda/miniconda or micromamba installation to use for managing + // dependencies (path returned by running `[conda | micromamba] info`.) If empty, + // framework will attempt to determine location of system's conda installation. + "conda_root": "/Users/eriley/anaconda3", + + // Directory containing the framework-specific conda environments. This should + // be equal to the "--env_dir" flag passed to conda_env_setup.sh. If left + // blank, the framework will look for its environments in conda_root/envs + "conda_env_root": "/Users/eriley/anaconda3/envs", + + // Path to micromamba executable if using micromamba + "micromamba_exe": "", + + // SETTINGS ------------------------------------------------------------------ + // Any command-line option recognized by the mdtf script (type `mdtf --help`) + // can be set here, in the form "flag name": "desired setting". + + // Settings affecting what output is generated: + // Set to true to run the preprocessor; default true: + "run_pp": false, + + // Set to true to perform data translation; default false: + "translate_data": true, + + // Set to true to have PODs save postscript figures in addition to bitmaps. + "save_ps": false, + + // Set to true for files > 4 GB + "large_file": false, + + // If true, leave pp data in OUTPUT_DIR after preprocessing; if false, delete pp data after PODs + // run to completion + "save_pp_data": true, + + // Set to true to save HTML and bitmap plots in a .tar file. + "make_variab_tar": false, + + // Generate html output for multiple figures per case + "make_multicase_figure_html": false, + + // Set to true to overwrite results in OUTPUT_DIR; otherwise results saved + // under a unique name. + "overwrite": false, + // List with custom preprocessing script(s) to run on data + // Place these scripts in the user_scripts directory of your copy of the MDTF-diagnostics repository + "user_pp_scripts" : ["example_pp_script.py"] +} diff --git a/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv b/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv index 68776a919..42ec4e46e 100644 --- a/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv +++ b/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv @@ -1,3 +1,3 @@ -activity_id,branch_method,branch_time_in_child,branch_time_in_parent,experiment,experiment_id,frequency,grid,grid_label,institution_id,nominal_resolution,parent_activity_id,parent_experiment_id,parent_source_id,parent_time_units,parent_variant_label,product,realm,source_id,source_type,sub_experiment,sub_experiment_id,table_id,variable_id,variant_label,member_id,standard_name,long_name,units,vertical_levels,init_year,start_time,end_time,time_range,path,version -CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1980-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1980-01-01,1984-12-31,1980-01-01-1984-12-31,/Users/jess/mdtf/inputdata/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231/day/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231.tas.day.nc,none -CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1985-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1985-01-01,1989-12-31,1985-01-01-1989-12-31,/Users/jess/mdtf/inputdata/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231/day/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231.tas.day.nc,none +activity_id,branch_method,branch_time_in_child,branch_time_in_parent,experiment,experiment_id,frequency,grid,grid_label,institution_id,nominal_resolution,parent_activity_id,parent_experiment_id,parent_source_id,parent_time_units,parent_variant_label,product,realm,source_id,source_type,sub_experiment,sub_experiment_id,table_id,variable_id,variant_label,member_id,standard_name,long_name,units,vertical_levels,init_year,start_time,end_time,time_range,path,version +CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1980-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1980-01-01,1984-12-31,1980-01-01-1984-12-31,/Users/eriley/mdtf/inputdata/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231/day/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231.tas.day.nc,none +CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1985-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1985-01-01,1989-12-31,1985-01-01-1989-12-31,/Users/eriley/mdtf/inputdata/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231/day/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231.tas.day.nc,none \ No newline at end of file diff --git a/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json b/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json index c62245645..7035c2d6d 100644 --- a/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json +++ b/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json @@ -177,5 +177,5 @@ "description": null, "title": null, "last_updated": "2023-06-01", - "catalog_file": "file:/Users/jess/mdtf/MDTF-diagnostics/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv" -} \ No newline at end of file + "catalog_file": "file:///Users/eriley/mdtf/MDTF-diagnostics/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv" +} diff --git a/diagnostics/example_multicase/runtime_config.yml b/diagnostics/example_multicase/runtime_config.yml new file mode 100755 index 000000000..0ea49986f --- /dev/null +++ b/diagnostics/example_multicase/runtime_config.yml @@ -0,0 +1,69 @@ +# Runtime yaml configuration file template for the MDTF-diagnostics package +# Create a copy of this file for personal use, and pass it to the framework +# with the -f | --configfile flag + +# List of POD(s) to run +pod_list: + - "example_multicase" + +# Case list entries (must be unique IDs for each simulation) +case_list: + "CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231" : + model: "test" + convention: "CMIP" + startdate: "19800101120000" + enddate: "19841231000000" + +# "CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231" : +# model: "test" +# convention: "CMIP" +# startdate: "19850101000000" +# enddate: "19891231000000" + +### Data location settings ### +# Required: full or relative path to ESM-intake catalog header file +DATA_CATALOG: "./diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json" +# Optional: Parent directory containing observational data used by individual PODs. +# If defined, the framework assumes observational data is in OBS_DATA_ROOT/[POD_NAME] +OBS_DATA_ROOT: "../inputdata/obs_data" +# Required: Working directory location. Temporary files are written here. +# Final output is also written here if the OUTPUT_DIR is not defined. +WORK_DIR: "../wkdir" +# Optional: Location to write final output if you don't want it in the wkdir +OUTPUT_DIR: "../wkdir" +### Environment Settings ### +# Required: Location of the Anaconda/miniconda installation to use for managing +# dependencies (path returned by running `conda info --base`.) +conda_root: "/Users/eriley/anaconda3" +# Optional: Directory containing the framework-specific conda environments. This should +# be equal to the "--env_dir" flag passed to conda_env_setup.sh. If left +# blank, the framework will look for its environments in conda_root/envs +conda_env_root: "../conda_envs" +# Location of micromamba executable; REQUIRED if using micromamba +micromamba_exe: "" +### Data type settings ### +# set to true to handle data files > 4 GB +large_file: False +### Output Settings ### +# Set to true to have PODs save postscript figures in addition to bitmaps. +save_ps: False +# If true, leave pp data in OUTPUT_DIR after preprocessing; if false, delete pp data after PODs +# run to completion +save_pp_data: True +# Set to true to perform data translation; default is True: +translate_data: True +# Set to true to save HTML and bitmap plots in a .tar file. +make_variab_tar: False +# Set to true to overwrite results in OUTPUT_DIR; otherwise results saved +# under a unique name. +overwrite: False +# Generate html output for multiple figures per case +"make_multicase_figure_html": False +### Developer settings ### +# Set to True to run the preprocessor +run_pp: True +# Additional custom preprocessing scripts to run on data +# place these scripts in the MDTF-diagnostics/user_scripts directory +# The framework will run the specified scripts whether run_pp is set to True or False +user_pp_scripts: + - "" diff --git a/src/conda/env_dev.yml b/src/conda/env_dev.yml index 367390b96..b5883caee 100644 --- a/src/conda/env_dev.yml +++ b/src/conda/env_dev.yml @@ -41,7 +41,7 @@ dependencies: - cloud_sptheme - snakeviz=2.2.0 - graphviz=2.50.0 -- pip=24.3.1 -- pip: - - gprof2dot==2024.6.6 - - viztracer==1.0.0 +#- pip=24.3.1 +#- pip: +# - gprof2dot==2024.6.6 +# - viztracer==1.0.0 diff --git a/src/conda/env_python3_base.yml b/src/conda/env_python3_base.yml index 69136076a..96e416d8c 100644 --- a/src/conda/env_python3_base.yml +++ b/src/conda/env_python3_base.yml @@ -40,16 +40,15 @@ dependencies: - bottleneck=1.3.8 - gfortran=11.3.0 - momlevel=1.0.1 -- pip=23.3.1 -- pip: - - falwa==2.0.0 - - cmocean - - regionmask - - momgrid==1.9.0 - - git+https://github.com/jkrasting/doralite - - git+https://github.com/raphaeldussin/static_downsampler - - git+https://github.com/jkrasting/xcompare - - git+https://github.com/raphaeldussin/xoverturning - - git+https://github.com/jkrasting/xwavelet - - git+https://github.com/jetesdal/xwmt - - git+https://github.com/raphaeldussin/om4labs +#- pip=23.3.1 +#- pip: +# - falwa==2.0.0 +# - cmocean +# - regionmask +# - global_land_mask +# - git+https://github.com/raphaeldussin/static_downsampler +# - git+https://github.com/jkrasting/xcompare +# - git+https://github.com/raphaeldussin/xoverturning +# - git+https://github.com/jkrasting/xwavelet +# - git+https://github.com/jetesdal/xwmt +# - git+https://github.com/raphaeldussin/om4labs diff --git a/src/conda/env_python3_base_micromamba.yml b/src/conda/env_python3_base_micromamba.yml index b3c81dd2a..6670285a1 100644 --- a/src/conda/env_python3_base_micromamba.yml +++ b/src/conda/env_python3_base_micromamba.yml @@ -37,17 +37,15 @@ dependencies: - jupyterlab=4.1.0 - bottleneck=1.3.8 - gfortran=11.3.0 -- pip=23.3.1 -- pip: - - cmocean - - regionmask - - falwa==2.0.0 - - momgrid==1.9.0 - - git+https://github.com/jkrasting/doralite - - git+https://github.com/ajdawson/gridfill - - git+https://github.com/raphaeldussin/static_downsampler - - git+https://github.com/jkrasting/xcompare - - git+https://github.com/raphaeldussin/xoverturning - - git+https://github.com/jkrasting/xwavelet - - git+https://github.com/jetesdal/xwmt - - git+https://github.com/raphaeldussin/om4labs +#- pip=23.3.1 +#- pip: +# - cmocean +# - regionmask +# - falwa==2.0.0 +# - git+https://github.com/ajdawson/gridfill +# - git+https://github.com/raphaeldussin/static_downsampler +# - git+https://github.com/jkrasting/xcompare +# - git+https://github.com/raphaeldussin/xoverturning +# - git+https://github.com/jkrasting/xwavelet +# - git+https://github.com/jetesdal/xwmt +# - git+https://github.com/raphaeldussin/om4labs diff --git a/tools/catalog_builder/test_builder_config.yml b/tools/catalog_builder/test_builder_config.yml new file mode 100644 index 000000000..f0e5392d4 --- /dev/null +++ b/tools/catalog_builder/test_builder_config.yml @@ -0,0 +1,31 @@ +## Configuration file template for catalog_builder +## DRS convention to use cmip | gfdl | cesm +convention: cmip +## IMPORTANT: Attempting to build a catalog of the entire contents of a pp directory will likely max out available PPAN resources (i.e., it takes longer than 12 hours to build a catalog for atmos/ts/monthly/5yr one node w/16 threads). It is strongly recommended to use include_patterns and/or exclude_patterns to target a specific subset of variables and dates to improve the performance of the catalog builder. +## Path(s) to the root directory with the target dataset +data_root_dirs: + - /Users/eriley/NOAA_POD/model_data/CESM2/tauu + - /Users/eriley/NOAA_POD/model_data/sftlf_historical/CESM2 + # - /archive/oar.gfdl.cmip6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos/ts/monthly/5yr +## (optional) dataset id used to determine parser for selected convention. Accepted values: am5 +# dataset_id: am5 +## depth to traverse for files from data_root_dir(s) +## (e.g., files that are in the root directory have dir_depth=1) +dir_depth: 0 +## where to write catalog csv and json header files +output_dir: /Users/eriley/mdtf/MDTF-diagnostics/diagnostics/WWEs +## name of catalog (.csv and .json will be appended to catalog and header files) +output_filename: esm_catalog_CESM2_historical_r1i1p1f1 +## number of threads: 16 (for job running on one analysis node) +## The example catalog for the UDA directory takes a little over 5 min to build +num_threads: 1 +## optional list of patterns to include in file and directory search +#include_patterns: +# - "*19800101-2014*.nc" +# - "*hght.nc" +# - "*slp.nc" +# - "*t_surf.nc" +# - "*t_ref.nc" +## optional list of patterns to exclude from file and directory search +exclude_patterns: + - "DO_NOT_USE"