diff --git a/fre/cmor/tests/test_cmor_run_subtool.py b/fre/cmor/tests/test_cmor_run_subtool.py index 135c645aa..3bf212627 100644 --- a/fre/cmor/tests/test_cmor_run_subtool.py +++ b/fre/cmor/tests/test_cmor_run_subtool.py @@ -9,6 +9,8 @@ import subprocess import shutil +import netCDF4 +import numpy as np import pytest from fre.cmor import cmor_run_subtool @@ -57,6 +59,57 @@ def test_setup_cmor_cmip_table_repo(): FULL_OUTPUTFILE = \ f"{FULL_OUTPUTDIR}/sos_Omon_PCMDI-test-1-0_piControl-withism_r3i1p1f1_{GRID_LABEL}_{DATETIMES_INPUTFILE}.nc" +# CMIP6-required global attributes that must be present in CMOR output +CMIP6_REQUIRED_GLOBAL_ATTRS = [ + 'variable_id', 'mip_era', 'table_id', + 'experiment_id', 'institution_id', 'source_id' +] + + +def _assert_data_matches(ds_in, ds_out): + ''' + helper: assert that science variable data, coordinate data, and shapes + are preserved between input and CMOR output datasets. + ''' + # the science variable data must be preserved exactly + assert np.array_equal(ds_in.variables['sos'][:], ds_out.variables['sos'][:]), \ + "sos data values differ between input and CMOR output" + + # coordinate data must be preserved + assert np.allclose(ds_in.variables['lat'][:], ds_out.variables['lat'][:]), \ + "latitude data differs between input and CMOR output" + assert np.allclose(ds_in.variables['lon'][:], ds_out.variables['lon'][:]), \ + "longitude data differs between input and CMOR output" + assert np.allclose(ds_in.variables['time'][:], ds_out.variables['time'][:]), \ + "time data differs between input and CMOR output" + + # variable shapes must be preserved + assert ds_in.variables['sos'][:].shape == ds_out.variables['sos'][:].shape, \ + "sos data shape differs between input and CMOR output" + + +def _assert_metadata_matches(ds_in, ds_out): + ''' + helper: assert that CMIP6-required global attributes are present and that + key variable-level metadata is preserved between input and CMOR output datasets. + ''' + # CMOR output must contain CMIP6-required global attributes + for required_attr in CMIP6_REQUIRED_GLOBAL_ATTRS: + assert required_attr in ds_out.ncattrs(), \ + f"CMOR output missing required global attribute '{required_attr}'" + + # science variable standard_name and long_name must be preserved + assert ds_in.variables['sos'].standard_name == ds_out.variables['sos'].standard_name, \ + "sos standard_name differs between input and CMOR output" + assert ds_in.variables['sos'].long_name == ds_out.variables['sos'].long_name, \ + "sos long_name differs between input and CMOR output" + + # _FillValue and missing_value must be preserved + assert ds_in.variables['sos']._FillValue == ds_out.variables['sos']._FillValue, \ + "sos _FillValue differs between input and CMOR output" + assert ds_in.variables['sos'].missing_value == ds_out.variables['sos'].missing_value, \ + "sos missing_value differs between input and CMOR output" + def test_setup_fre_cmor_run_subtool(capfd): ''' @@ -124,24 +177,13 @@ def test_fre_cmor_run_subtool_case1_output_compare_data(capfd): print(f'FULL_OUTPUTFILE={FULL_OUTPUTFILE}') print(f'FULL_INPUTFILE={FULL_INPUTFILE}') - nccmp_cmd= [ "nccmp", "-f", "-d", - f"{FULL_INPUTFILE}", - f"{FULL_OUTPUTFILE}" ] - print(f"via subprocess, running {' '.join(nccmp_cmd)}") - result = subprocess.run( ' '.join(nccmp_cmd), - shell=True, - check=False, - capture_output=True - ) + with netCDF4.Dataset(FULL_INPUTFILE) as ds_in, \ + netCDF4.Dataset(FULL_OUTPUTFILE) as ds_out: + # file formats should differ: CMOR converts input to NETCDF4_CLASSIC + assert ds_in.file_format != ds_out.file_format, \ + f"expected file formats to differ, got input={ds_in.file_format}, output={ds_out.file_format}" - # err_list has length two if end in newline - err_list = result.stderr.decode().split('\n') - expected_err = \ - "DIFFER : FILE FORMATS : NC_FORMAT_NETCDF4 <> NC_FORMAT_NETCDF4_CLASSIC" - assert all( [result.returncode == 1, - len(err_list)==2, - '' in err_list, - expected_err in err_list ] ) + _assert_data_matches(ds_in, ds_out) _out, _err = capfd.readouterr() def test_fre_cmor_run_subtool_case1_output_compare_metadata(capfd): @@ -149,16 +191,13 @@ def test_fre_cmor_run_subtool_case1_output_compare_metadata(capfd): print(f'FULL_OUTPUTFILE={FULL_OUTPUTFILE}') print(f'FULL_INPUTFILE={FULL_INPUTFILE}') - nccmp_cmd= [ "nccmp", "-f", "-m", "-g", - f"{FULL_INPUTFILE}", - f"{FULL_OUTPUTFILE}" ] - print(f"via subprocess, running {' '.join(nccmp_cmd)}") - result = subprocess.run( ' '.join(nccmp_cmd), - shell=True, - check=False - ) + with netCDF4.Dataset(FULL_INPUTFILE) as ds_in, \ + netCDF4.Dataset(FULL_OUTPUTFILE) as ds_out: + # CMOR processing should add/change global attributes + assert set(ds_in.ncattrs()) != set(ds_out.ncattrs()), \ + "expected global attributes to differ between input and CMOR output" - assert result.returncode == 1 + _assert_metadata_matches(ds_in, ds_out) _out, _err = capfd.readouterr() @@ -253,22 +292,13 @@ def test_fre_cmor_run_subtool_case2_output_compare_data(capfd): print(f'FULL_OUTPUTFILE={FULL_OUTPUTFILE}') print(f'FULL_INPUTFILE_DIFF={FULL_INPUTFILE_DIFF}') - nccmp_cmd= [ "nccmp", "-f", "-d", - f"{FULL_INPUTFILE_DIFF}", - f"{FULL_OUTPUTFILE}" ] - print(f"via subprocess, running {' '.join(nccmp_cmd)}") - result = subprocess.run( ' '.join(nccmp_cmd), - shell=True, - check=False, - capture_output=True - ) - - err_list = result.stderr.decode().split('\n')#length two if end in newline - expected_err="DIFFER : FILE FORMATS : NC_FORMAT_NETCDF4 <> NC_FORMAT_NETCDF4_CLASSIC" - assert all( [result.returncode == 1, - len(err_list)==2, - '' in err_list, - expected_err in err_list ] ) + with netCDF4.Dataset(FULL_INPUTFILE_DIFF) as ds_in, \ + netCDF4.Dataset(FULL_OUTPUTFILE) as ds_out: + # file formats should differ: CMOR converts input to NETCDF4_CLASSIC + assert ds_in.file_format != ds_out.file_format, \ + f"expected file formats to differ, got input={ds_in.file_format}, output={ds_out.file_format}" + + _assert_data_matches(ds_in, ds_out) _out, _err = capfd.readouterr() def test_fre_cmor_run_subtool_case2_output_compare_metadata(capfd): @@ -276,16 +306,13 @@ def test_fre_cmor_run_subtool_case2_output_compare_metadata(capfd): print(f'FULL_OUTPUTFILE={FULL_OUTPUTFILE}') print(f'FULL_INPUTFILE_DIFF={FULL_INPUTFILE_DIFF}') - nccmp_cmd= [ "nccmp", "-f", "-m", "-g", - f"{FULL_INPUTFILE_DIFF}", - f"{FULL_OUTPUTFILE}" ] - print(f"via subprocess, running {' '.join(nccmp_cmd)}") - result = subprocess.run( ' '.join(nccmp_cmd), - shell=True, - check=False - ) + with netCDF4.Dataset(FULL_INPUTFILE_DIFF) as ds_in, \ + netCDF4.Dataset(FULL_OUTPUTFILE) as ds_out: + # CMOR processing should add/change global attributes + assert set(ds_in.ncattrs()) != set(ds_out.ncattrs()), \ + "expected global attributes to differ between input and CMOR output" - assert result.returncode == 1 + _assert_metadata_matches(ds_in, ds_out) _out, _err = capfd.readouterr() def test_git_cleanup(): diff --git a/fre/tests/test_files/CMOR_input_example.json b/fre/tests/test_files/CMOR_input_example.json index b7726c674..17e295eec 100644 --- a/fre/tests/test_files/CMOR_input_example.json +++ b/fre/tests/test_files/CMOR_input_example.json @@ -44,4 +44,4 @@ "#output_path_template": "Template for output path directory using tables keys or global attributes, these should follow the relevant data reference syntax", "output_path_template": "<_member_id>", "output_file_template": "
<_member_id>" -} +} \ No newline at end of file