diff --git a/.github/workflows/run_test_suite.yml b/.github/workflows/run_test_suite.yml index 6c455210..319df681 100644 --- a/.github/workflows/run_test_suite.yml +++ b/.github/workflows/run_test_suite.yml @@ -7,10 +7,18 @@ jobs: steps: - uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v4 + + - name: Setup python environment + run: | + pip install numpy xarray netcdf4 + - name: Load Environment run: | sudo apt-get update sudo apt install make gfortran netcdf-bin libnetcdf-dev libnetcdff-dev openmpi-bin libopenmpi-dev + - name: Run Test Suite run: | ./reg_tests/common/setup_inputdata.sh diff --git a/CVMix_tools/netcdf_comparison.py b/CVMix_tools/netcdf_comparison.py new file mode 100755 index 00000000..bced36ae --- /dev/null +++ b/CVMix_tools/netcdf_comparison.py @@ -0,0 +1,354 @@ +#!/usr/bin/env python + +""" + Usage: + $ ./netcdf_comparison.py --baseline BASELINE_FILE --new-file NEW_FILE + --strict {exact,loose} [-r RTOL] [-a ATOL] [-t THRES] + + Use xarray and numpy to compare two netcdf files. + For each variable, flag + 1. Variables that are present in one file but not the other + 2. Variables where the data type doesn't match across files + 3. Variables where the dimensions don't match across files + 4. Variables where the missing values are not aligned + 5. Variables that differ in one of two ways (user specifies which strictness level to use): + a. Variables that are not exactly the same (--strict exact) + b. Variables that are not "close" to each other (--strict loose) + -- For values very close to 0 ( m/s + unit_conversion['cm']['m'] = 0.01 # cm -> m + unit_conversion['nmol/cm^3']['mmol/m^3'] = 1 # nmol/cm^3 -> mmol/m^3 + unit_conversion['nmol/cm^3/s']['mmol/m^3/s'] = 1 # nmol/cm^3/s -> mmol/m^3/s + unit_conversion['nmol/cm^2/s']['mmol/m^2/s'] = 0.01 # nmol/cm^2/s -> mmol/m^2/s + unit_conversion['(nmol/cm^3)^-1 s^-1']['(mmol/m^3)^-1 s^-1'] = 1 # same as nmol/cm^3 -> mmol/m^3 + unit_conversion['g/cm^3/s']['kg/m^3/s'] = 1000 # g/cm^3/s -> kg/m^3/s + unit_conversion['g/cm^2/s']['kg/m^2/s'] = 10 # g/cm^2/s -> kg/m^2/s + unit_conversion['meq/m^3']['neq/cm^3'] = 1 # meq/m^3 -> neq/cm^3 + unit_conversion['meq/m^3/s']['neq/cm^3/s'] = 1 # meq/m^3/s -> neq/cm^3/s + unit_conversion['neq/cm^2/s']['meq/m^2/s'] = 0.01 # meq/m^3 cm/s -> meq/m^2/s + unit_conversion['meq/m^3 cm/s']['neq/cm^2/s'] = 1 # meq/m^3 cm/s -> neq/cm^2/s + unit_conversion['meq/m^3 cm/s']['meq/m^2/s'] = 0.01 # meq/m^3 cm/s -> meq/m^2/s + unit_conversion['mg/m^3 cm/s']['mg/m^2/s'] = 0.01 # mg/m^3 cm/s -> mg/m^2/s + + conversion_factor = 1. + if('units' in ds_base[var].attrs and ds_new[var].attrs): + old_units = ds_base[var].attrs['units'] + new_units = ds_new[var].attrs['units'] + if old_units != new_units: + found=False + if new_units in unit_conversion and old_units in unit_conversion[new_units]: + conversion_factor = unit_conversion[new_units][old_units] + found = True + if not found: + if old_units in unit_conversion and new_units in unit_conversion[old_units]: + conversion_factor = 1. / unit_conversion[old_units][new_units] + found = True + if not found: + raise KeyError(f'Can not convert from {new_units} to {old_units} for {var}') + return conversion_factor + +def _variable_check_loose(ds_base, ds_new, rtol, atol, thres): + """ + Assumes both datasets contain the same variables with the same dimensions + Checks: + 1. Are NaNs in the same place? + 2. If baseline value = 0, then |new value| must be <= atol + 3. Absolute vs relative error: + i. If 0 < |baseline value| <= thres then want absolute difference < atol + ii. If |baseline value| > thres, then want relative difference < rtol + Note: if thres = 0 and rtol = 0, then this reduces to an exact test + (Are non-NaN values identical?) + """ + import numpy as np + + error_checking = {'var_check_count': 0} + common_vars = list(set(ds_base.variables) & set(ds_new.variables)) + common_vars.sort() + + for var in common_vars: + error_checking['messages'] = [] + + # (0) Update units of new file to match baseline + conversion_factor = _get_conversion_factor(ds_base, ds_new, var) + + # (1) Are NaNs in the same place? + if var.lower() == 'time': + continue + mask = np.isfinite(ds_base[var].data) + if np.any(mask ^ np.isfinite(ds_new[var].data)): + error_checking['messages'].append('NaNs are not in same place') + + # (2) compare everywhere that baseline is 0 + if np.any(np.where(ds_base[var].data[mask] == 0, + np.abs(ds_new[var].data[mask]) > atol, + False)): + error_checking['messages'].append( + 'Baseline is 0 at some indices where abs value ' + + 'of new data is larger than {}'.format(atol)) + + # (3i) Compare everywhere that 0 < |baseline| <= thres + base_data = np.where((ds_base[var].data[mask] != 0) & + (np.abs(ds_base[var].data[mask]) <= thres), + ds_base[var].data[mask], 0) + new_data = np.where((ds_base[var].data[mask] != 0) & + (np.abs(ds_base[var].data[mask]) <= thres), + conversion_factor*ds_new[var].data[mask], 0) + abs_err = np.abs(new_data - base_data) + if np.any(abs_err > atol): + error_checking['messages'].append("Max absolute error ({}) exceeds {}".format( + np.max(abs_err), atol)) + + # (3ii) Compare everywhere that |baseline| is > thres + base_data = np.where(np.abs(ds_base[var].data[mask]) > thres, + ds_base[var].data[mask], + 0) + new_data = np.where(np.abs(ds_base[var].data[mask]) > thres, + conversion_factor*ds_new[var].data[mask], + 0) + rel_err = _compute_rel_err(ds_base[var], base_data, new_data, mask) + if np.any(rel_err > rtol): + if rtol == 0: + abs_err = np.abs(new_data - base_data) + error_checking['messages'].append("Values are not the same everywhere\n{}".format( + " Max relative error: {}\n Max absolute error: {}".format( + np.max(rel_err), np.max(abs_err)) + )) + else: + error_checking['messages'].append("Max relative error ({}) exceeds {}".format( + np.max(rel_err), rtol)) + + error_checking['var_check_count'] += _report_errs(var, error_checking['messages']) + + return error_checking['var_check_count']>0 + +################## + +def _compute_rel_err(da_base, base_data, new_data, mask): + # denominator for relative error is local max value (3-point stencil) + # note the assumption that column is first dimension + import numpy as np + + if 'num_levels' in da_base.dims: + # For variables with a depth dimension, we use a 3-point stencil in depth to find the + # maximum value of the baseline value to use in the denominator of the relative error. + # For the top (bottom) of the column, we use a 2-point stencil with the value + # below (above) the given level + rel_denom = da_base.rolling(num_levels=3, center=True, min_periods=2).max().data[mask] + else: + # For variables without a depth dimension, the denominator is the baseline value + rel_denom = da_base.data[mask] + rel_denom = np.where(np.isfinite(rel_denom), rel_denom, 0) + return np.where(base_data != 0, np.abs(new_data - base_data), 0) / \ + np.where(rel_denom != 0, rel_denom, 1) + +################## + +def _report_errs(var, err_messages): + """ + err_messages is a list of all accumulated errors + """ + logger = logging.getLogger(__name__) + if err_messages: + logger.info("Variable: %s", var) + for err in err_messages: + logger.info("... %s", err) + logger.info("") + return True + return False + +################## + +def _parse_args(): + """ Parse command line arguments + """ + + import argparse + + parser = argparse.ArgumentParser(description="Compare two netCDF files using xarray and numpy", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + # Baseline for comparison + parser.add_argument('-b', '--baseline', action='store', dest='baseline', required=True, + help='Baseline for comparison') + + # File to compare to baseline + parser.add_argument('-n', '--new-file', action='store', dest='new_file', required=True, + help="File to compare to baseline") + + # Tolerances + parser.add_argument('--strict', choices=['exact', 'loose'], required=True, + help="Should files be bit-for-bit [exact] or within some tolerance [loose]") + + parser.add_argument('-r', '--rtol', action='store', dest='rtol', + default=DEFAULT_TOLS['rtol'], type=float, + help="Maximum allowable relative tolerance (only if strict=loose)") + + parser.add_argument('-a', '--atol', action='store', dest='atol', + default=DEFAULT_TOLS['atol'], type=float, + help="Maximum allowable absolute tolerance (only if strict=loose)") + + parser.add_argument('-t', '--thres', action='store', dest='thres', + default=DEFAULT_TOLS['thres'], type=float, + help="Threshold to switch from abs tolerance to rel (only if strict=loose)") + + return parser.parse_args() + +################## + +if __name__ == "__main__": + import os + + # Set up logging +# logging.basicConfig(format='%(levelname)s (%(funcName)s): %(message)s', level=logging.INFO) + logging.basicConfig(format='%(message)s', level=logging.INFO) + LOGGER = logging.getLogger(__name__) + + args = _parse_args() + ds_base_in, ds_new_in = _open_files(args.baseline, args.new_file) + if args.strict == 'loose': + if ds_comparison_loose(ds_base_in, ds_new_in, args.rtol, args.atol, args.thres): + LOGGER.error("Differences found between files!") + os.sys.exit(1) + LOGGER.info("PASS: All variables match and are within specified tolerance.") + if args.strict == 'exact': + if ds_comparison_exact(ds_base_in, ds_new_in): + LOGGER.error("Differences found between files!") + os.sys.exit(1) + LOGGER.info("PASS: All variables match and have exactly the same values.") diff --git a/CVMix_tools/run_test_suite.sh b/CVMix_tools/run_test_suite.sh index b5af9036..47e43f4d 100755 --- a/CVMix_tools/run_test_suite.sh +++ b/CVMix_tools/run_test_suite.sh @@ -24,6 +24,22 @@ function print_status() { ################################################# +# Run baseline comparison test +function baseline_comparison() { + testname=$(basename $PWD) + for file in `ls *.nc`; do + if [ -f ${CVMIX_ROOT}/baselines/${testname}/${file} ]; then + ${CVMIX_ROOT}/CVMix_tools/netcdf_comparison.py \ + -b ${CVMIX_ROOT}/baselines/${testname}/${file} \ + -n ${file} --strict loose + STATUS=$(check_return $?) + print_status "${testname} baseline comparison (${file})" >> $OUTFILE + fi + done +} + +################################################# + ############### # Global Vars # ############### @@ -114,7 +130,7 @@ if [ "${STATUS}" == "PASS" ]; then echo "$ ./kpp-test.sh" ./kpp-test.sh STATUS=$(check_return $?) - print_status "KPP.sh" >> $OUTFILE + print_status "KPP" >> $OUTFILE # Build stand-alone executable with netCDF cd ${CVMIX_ROOT}/src @@ -131,8 +147,53 @@ if [ "${STATUS}" == "PASS" ]; then ./Simmons-test.sh -nc STATUS=$(check_return $?) print_status "Tidal (requires netCDF)" >> $OUTFILE + if [ "${STATUS}" == "PASS" ]; then + baseline_comparison + fi + + # Bryan-Lewis baseline comparison test + cd ${CVMIX_ROOT}/reg_tests/Bryan-Lewis + echo "$ ./Bryan-Lewis-test.sh -nc" + ./Bryan-Lewis-test.sh -nc + STATUS=$(check_return $?) + print_status "Bryan Lewis (with netCDF)" >> $OUTFILE + if [ "${STATUS}" == "PASS" ]; then + baseline_comparison + ${CVMIX_ROOT}/CVMix_tools/netcdf_comparison.py -b data_memcopy.nc -n data_pointer.nc --strict exact + STATUS=$(check_return $?) + print_status "Bryan-Lewis memcopy and pointer comparison" >> $OUTFILE + fi + + # Double Diffusion baseline comparison test + cd ${CVMIX_ROOT}/reg_tests/double_diff + echo "$ ./double_diff-test.sh -nc" + ./double_diff-test.sh -nc + STATUS=$(check_return $?) + print_status "Double Diffusion (with netCDF)" >> $OUTFILE + if [ "${STATUS}" == "PASS" ]; then + baseline_comparison + fi + + # Shear baseline comparison test + cd ${CVMIX_ROOT}/reg_tests/shear + echo "$ ./shear-test.sh -nc" + ./shear-test.sh -nc + STATUS=$(check_return $?) + print_status "Shear (with netCDF)" >> $OUTFILE + if [ "${STATUS}" == "PASS" ]; then + baseline_comparison + fi + + # KPP baseline comparison test + cd ${CVMIX_ROOT}/reg_tests/kpp + echo "$ ./kpp-test.sh -nc" + ./kpp-test.sh -nc + STATUS=$(check_return $?) + print_status "KPP (with netCDF)" >> $OUTFILE + if [ "${STATUS}" == "PASS" ]; then + baseline_comparison + fi fi - fi diff --git a/baselines/Bryan-Lewis/data_memcopy.nc b/baselines/Bryan-Lewis/data_memcopy.nc new file mode 100644 index 00000000..9806264e Binary files /dev/null and b/baselines/Bryan-Lewis/data_memcopy.nc differ diff --git a/baselines/Bryan-Lewis/data_pointer.nc b/baselines/Bryan-Lewis/data_pointer.nc new file mode 100644 index 00000000..9806264e Binary files /dev/null and b/baselines/Bryan-Lewis/data_pointer.nc differ diff --git a/baselines/double_diff/data.nc b/baselines/double_diff/data.nc new file mode 100644 index 00000000..361743da Binary files /dev/null and b/baselines/double_diff/data.nc differ diff --git a/baselines/kpp/test1.nc b/baselines/kpp/test1.nc new file mode 100644 index 00000000..33030fc0 Binary files /dev/null and b/baselines/kpp/test1.nc differ diff --git a/baselines/kpp/test3.nc b/baselines/kpp/test3.nc new file mode 100644 index 00000000..84ac3430 Binary files /dev/null and b/baselines/kpp/test3.nc differ diff --git a/baselines/kpp/test4a.nc b/baselines/kpp/test4a.nc new file mode 100644 index 00000000..f0b97d6a Binary files /dev/null and b/baselines/kpp/test4a.nc differ diff --git a/baselines/kpp/test4b.nc b/baselines/kpp/test4b.nc new file mode 100644 index 00000000..cd2f033a Binary files /dev/null and b/baselines/kpp/test4b.nc differ diff --git a/baselines/kpp/test4c.nc b/baselines/kpp/test4c.nc new file mode 100644 index 00000000..cf929e15 Binary files /dev/null and b/baselines/kpp/test4c.nc differ diff --git a/baselines/kpp/test4d.nc b/baselines/kpp/test4d.nc new file mode 100644 index 00000000..cc104176 Binary files /dev/null and b/baselines/kpp/test4d.nc differ diff --git a/baselines/kpp/test5.nc b/baselines/kpp/test5.nc new file mode 100644 index 00000000..0933e5d8 Binary files /dev/null and b/baselines/kpp/test5.nc differ diff --git a/baselines/kpp/test7.nc b/baselines/kpp/test7.nc new file mode 100644 index 00000000..406be984 Binary files /dev/null and b/baselines/kpp/test7.nc differ diff --git a/baselines/shear/data_LMD.nc b/baselines/shear/data_LMD.nc new file mode 100644 index 00000000..b485f515 Binary files /dev/null and b/baselines/shear/data_LMD.nc differ diff --git a/baselines/shear/data_PP1d.nc b/baselines/shear/data_PP1d.nc new file mode 100644 index 00000000..8477becd Binary files /dev/null and b/baselines/shear/data_PP1d.nc differ diff --git a/baselines/shear/data_PP2d.nc b/baselines/shear/data_PP2d.nc new file mode 100644 index 00000000..9e007d54 Binary files /dev/null and b/baselines/shear/data_PP2d.nc differ diff --git a/baselines/tidal-Simmons/single_col.nc b/baselines/tidal-Simmons/single_col.nc new file mode 100644 index 00000000..4ca3ebba Binary files /dev/null and b/baselines/tidal-Simmons/single_col.nc differ