diff --git a/README.md b/README.md index bbc39d4..cb4dd28 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,9 @@ specified. If the same attribute is defined more than once, the last attribute file specified takes precedence. Like cascading style sheets this means default values can be given and overridden when necessary. +> [!NOTE] +> The `_FillValue` attribute of variables can be removed but not added or changed by `addmeta`. + ### Dynamic templating `addmeta` supports limited dynamic templating to allow injection of file specific @@ -205,6 +208,15 @@ netCDF applications are expected to update the history attribute when modifying the files. This can be enabled in `addmeta` with the `--update-history` commandline argument. +### Sorting Attributes + +Global and variable attributes can be sorted lexicographically ignoring-case by `addmeta` if needed. + +Sorting for all global and variable attributes can be enabled with the `-s`/`--sort` argument. + +> [!NOTE] +> The `_FillValue` attribute of variables cannot be sorted. + ## Invocation `addmeta` provides a command line interface. Invoking with the `-h` flag prints @@ -230,7 +242,7 @@ a summay of how to invoke the program correctly. One or more key/value data files in YAML format -f FNREGEX, --fnregex FNREGEX Extract metadata from filename using regex - -s, --sort Sort all keys lexicographically, ignoring case + -s, --sort Sort global and variable attributes lexicographically, ignoring case --update-history Update or create the history global attribute -v, --verbose Verbose output diff --git a/addmeta/addmeta.py b/addmeta/addmeta.py index 1c3b7f8..b4d95ad 100755 --- a/addmeta/addmeta.py +++ b/addmeta/addmeta.py @@ -110,14 +110,17 @@ def add_meta(ncfile, metadict, template_vars, sort_attrs=False, history=None, ve """ Add meta data from a dictionary to a netCDF file """ - rootgrp = nc.Dataset(ncfile, "r+") # Add metadata to matching variables if "variables" in metadict: for var, attr_dict in metadict["variables"].items(): if var in rootgrp.variables: + if sort_attrs: + attr_dict = remove_update_sort_attrs(rootgrp.variables[var], + attr_dict) + for attr, value in attr_dict.items(): - set_attribute(rootgrp.variables[var], attr, value, template_vars) + set_attribute(rootgrp.variables[var], attr, value, template_vars, verbose=verbose, var=var) # Update (or create) the history attribute if history: @@ -126,12 +129,10 @@ def add_meta(ncfile, metadict, template_vars, sort_attrs=False, history=None, ve # Set global meta data if "global" in metadict: if sort_attrs: - # Remove all global attributes, update with new attributes and then sort - # | merges two dicts preferring keys from the right - metadict['global'] = order_dict(delete_global_attributes(rootgrp) | metadict['global']) + metadict['global'] = remove_update_sort_attrs(rootgrp, metadict['global']) for attr, value in metadict['global'].items(): - set_attribute(rootgrp, attr, value, template_vars, verbose) + set_attribute(rootgrp, attr, value, template_vars, verbose=verbose) rootgrp.close() @@ -165,20 +166,24 @@ def array_to_csv(array): else: return f.getvalue() -def set_attribute(group, attribute, value, template_vars, verbose=False): +def set_attribute(group, attribute, value, template_vars, verbose=False, var=None,): """ Small wrapper to select, delete, or set attribute depending on value passed and expand jinja template variables """ + attr_name = f"{var}:{attribute}" if var else attribute + if value is None: if attribute in group.__dict__: try: group.delncattr(attribute) except UndefinedError as e: - warn(f"Could not delete attribute '{attribute}': {e}") + warn(f"Could not delete attribute '{attr_name}': {e}") return finally: - if verbose: print(f" - {attribute}") + if verbose: print(f" - {attr_name}") + else: + if verbose: print(f" - {attr_name} (nothing to delete)") else: if isinstance(value, (list, tuple)): value = array_to_csv(value) @@ -188,10 +193,10 @@ def set_attribute(group, attribute, value, template_vars, verbose=False): try: value = Template(value, undefined=StrictUndefined).render(template_vars) except UndefinedError as e: - warn(f"Skip setting attribute '{attribute}': {e}") + warn(f"Skip setting attribute '{attr_name}': {e}") return finally: - if verbose: print(f" + {attribute}: {value}") + if verbose: print(f" + {attr_name}: {value}") group.setncattr(attribute, value) @@ -238,7 +243,7 @@ def find_and_add_meta(ncfiles, metadata, kwdata, fnregexs, sort_attrs=False, his history=history, verbose=verbose ) - + def skip_comments(file): """Skip lines that begin with a comment character (#) or are empty """ @@ -253,15 +258,26 @@ def list_from_file(fname): return filelist -def delete_global_attributes(rootgrp): +def remove_update_sort_attrs(ncgroup, attr_dict): + """ + Remove the attributes from a netCDF group, merge the removed attrs with the + provided dictionary (favouring the dict) and return the sorted result. + """ + # | merges two dicts preferring keys from the right + return order_dict(delete_group_attributes(ncgroup) | attr_dict) + +def delete_group_attributes(ncgroup): """ - Delete all global attributes and return as dict + Delete all attributes for a netCDF group and return as dict """ deleted = {} - for attr in rootgrp.ncattrs(): - deleted[attr] = rootgrp.getncattr(attr) - rootgrp.delncattr(attr) + for attr in ncgroup.ncattrs(): + # Not allow to add _FillValue as attr after variable creation + # Thus can't add it back on while sorting + if not (isinstance(ncgroup, nc.Variable) and attr == "_FillValue"): + deleted[attr] = ncgroup.getncattr(attr) + ncgroup.delncattr(attr) return deleted diff --git a/addmeta/cli.py b/addmeta/cli.py index 2aa2f5f..45602da 100644 --- a/addmeta/cli.py +++ b/addmeta/cli.py @@ -49,7 +49,7 @@ def parse_args(args): parser.add_argument("-d","--datafiles", help="One or more key/value data files in YAML format", action='append') parser.add_argument("-f","--fnregex", help="Extract metadata from filename using regex", default=[], action='append') parser.add_argument("--datavar", help="Key/value pair to be added as data variable, e.g. --datavar 'var=value'", default=[], action='append') - parser.add_argument("-s","--sort", help="Sort all keys lexicographically, ignoring case", action="store_true") + parser.add_argument("-s","--sort", help="Sort global and variable attributes lexicographically, ignoring case", action="store_true") parser.add_argument("--update-history", help="Update (or create) the history global attribute", action="store_true") parser.add_argument("-v","--verbose", help="Verbose output", action='store_true') parser.add_argument("files", help="netCDF files", nargs='*') diff --git a/test/common.py b/test/common.py index 65bd61b..d798ba7 100644 --- a/test/common.py +++ b/test/common.py @@ -40,7 +40,7 @@ def runcmd(cmd, rwd=None, env=None): local_env = os.environ.copy() if env is not None: local_env.update(env) - subprocess.run(shlex.split(cmd),stderr=subprocess.STDOUT, cwd=cwd, env=local_env) + subprocess.run(shlex.split(cmd),stderr=subprocess.STDOUT, cwd=cwd, env=local_env, check=True) @pytest.fixture def make_nc(tmp_path): diff --git a/test/meta1_FillValue.yaml b/test/meta1_FillValue.yaml new file mode 100644 index 0000000..2ee708e --- /dev/null +++ b/test/meta1_FillValue.yaml @@ -0,0 +1,8 @@ +global: + Publisher: "ARC Centre of Excellence for Climate System Science" + Year: 2017 + # Note the following two are to test that there can be key/variable + # pairs using the two reserved names as long as they're not a dictionary + variables: "temp, salt, salinity" + global: "yes" + _FillValue: "allowed to set this for global attrs" diff --git a/test/meta_var2.yaml b/test/meta_var2.yaml new file mode 100644 index 0000000..a12f0d6 --- /dev/null +++ b/test/meta_var2.yaml @@ -0,0 +1,15 @@ +variables: + temp: + short_name: "temp" + units : "degrees Kelvin" + max : 600 + min : -100 + _A : "Might get sorted before _FillValue" + missing_value: "Is this varname protected like _FillValue?" + scale_factor: "Is this varname protected like _FillValue?" + add_offset: "Is this varname protected like _FillValue?" + _Netcdf4Dimid: "Is this varname protected like _FillValue?" + REFERENCE_LIST: "Is this varname protected like _FillValue?" + Times: + funky_name: "It is totall time" + limits : 0 diff --git a/test/meta_var2_FillValue.yaml b/test/meta_var2_FillValue.yaml new file mode 100644 index 0000000..f3401c3 --- /dev/null +++ b/test/meta_var2_FillValue.yaml @@ -0,0 +1,16 @@ +variables: + temp: + short_name: "temp" + units : "degrees Kelvin" + max : 600 + min : -100 + _A : "Might get sorted before _FillValue" + missing_value: "Is this varname protected like _FillValue?" + scale_factor: "Is this varname protected like _FillValue?" + add_offset: "Is this varname protected like _FillValue?" + _Netcdf4Dimid: "Is this varname protected like _FillValue?" + REFERENCE_LIST: "Is this varname protected like _FillValue?" + _FillValue: "Not allowed to set this" + Times: + funky_name: "It is totall time" + limits : 0 diff --git a/test/test_cli.py b/test/test_cli.py index f786ea2..62c90ab 100644 --- a/test/test_cli.py +++ b/test/test_cli.py @@ -94,7 +94,7 @@ def test_missing_cmdlinearg_file(): with pytest.raises(SystemExit, match=f"Error: cmdlineargs file '{fname}' not found"): addmeta.cli.main_parse_args(args) -def test_missing_cmdlinearg_file(): +def test_missing_cmdlinearg_file2(): fname = "filedoesnotexist" @@ -104,17 +104,26 @@ def test_missing_cmdlinearg_file(): addmeta.cli.main(addmeta.cli.main_parse_args(args)) @patch('addmeta.cli.main') -def test_datavar_option(mock_main, touch_nc): - - args = ["--datavar","one=1","--datavar='two=2 words'", touch_nc[0]] - - assert addmeta.cli.main_parse_args(args) == Namespace(cmdlineargs=None, - metafiles=None, - metalist=None, - datafiles=None, - fnregex=[], - datavar=['one=1', "'two=2 words'"], - sort=False, - verbose=False, - update_history=False, - files=['test/ocean_1.nc']) \ No newline at end of file +@pytest.mark.parametrize("args,expected_namespace", + [ + # Test datavar option + pytest.param( + ["--datavar","one=1","--datavar='two=2 words'"], + Namespace(cmdlineargs=None, + metafiles=None, + metalist=None, + datafiles=None, + fnregex=[], + datavar=['one=1', "'two=2 words'"], + sort=False, + verbose=False, + update_history=False, + files=['test/ocean_1.nc']) + ), + ] +) +def test_options(mock_main, touch_nc, args, expected_namespace): + + args = [*args, touch_nc[0]] + + assert addmeta.cli.main_parse_args(args) == expected_namespace diff --git a/test/test_read_metadata.py b/test/test_read_metadata.py index 9412d76..fcdd08d 100644 --- a/test/test_read_metadata.py +++ b/test/test_read_metadata.py @@ -105,14 +105,22 @@ def test_list_from_file(): fname = 'test/metalist' filelist = list_from_file(fname) assert(filelist == [Path('test/meta1.yaml'), Path('test/meta2.yaml')]) - -def test_add_meta(make_nc): - dict1 = read_metadata("test/meta1.yaml") + +@pytest.mark.parametrize("global_yaml,variable_yaml", + [ + ("test/meta1.yaml", "test/meta_var1.yaml"), + # meta_var2.yaml includes the special attrs (similar to _FillValue) + # that are not protected by netCDF4 like _FillValue is + ("test/meta1.yaml", "test/meta_var2.yaml"), + ] +) +def test_add_meta(make_nc, global_yaml, variable_yaml): + dict1 = read_metadata(global_yaml) add_meta(make_nc, dict1, {}) assert(dict1_in_dict2(dict1["global"], get_meta_data_from_file(make_nc))) - dict1 = read_metadata("test/meta_var1.yaml") + dict1 = read_metadata(variable_yaml) add_meta(make_nc, dict1, {}) for var in dict1["variables"]: @@ -150,4 +158,19 @@ def test_del_attributes(make_nc): attributes = get_meta_data_from_file(make_nc, 'temp') assert( '_FillValue' not in attributes ) assert( 'Tiddly' in attributes ) - assert( 'Kelvin' == attributes['units'] ) \ No newline at end of file + assert( 'Kelvin' == attributes['units'] ) + +def test_find_add_meta_FillValue(make_nc): + """ + A duplicate of test_find_add_meta with _FillValue + + Test that setting the global attr _FillValue is as normal and that setting + a variable _FillValue raises an exception. + """ + find_and_add_meta( [make_nc], combine_meta(['test/meta2.yaml','test/meta1_FillValue.yaml']), {}, {}) + + dict1 = read_metadata("test/meta1.yaml") + assert(dict1_in_dict2(dict1["global"], get_meta_data_from_file(make_nc))) + + with pytest.raises(AttributeError, match="_FillValue attribute must be set when variable is created"): + find_and_add_meta( [make_nc], combine_meta(['test/meta_var2_FillValue.yaml']), {}, {} ) diff --git a/test/test_sort.py b/test/test_sort.py index e654958..1f0cb3c 100644 --- a/test/test_sort.py +++ b/test/test_sort.py @@ -19,7 +19,6 @@ """ import pytest -import netCDF4 import xarray from addmeta import order_dict @@ -170,3 +169,45 @@ def test_multisort(use_xarray, make_xarray_nc, make_nc): assert actual == expected # Check the order of the attrs is correct assert list(actual.keys()) == list(expected.keys()) + +@pytest.mark.parametrize("yaml,attr_lists", + [ + ( + "test/meta_var1.yaml", + { + "temp": ["units", "_FillValue", "missing_value", "long_name", "short_name", "max", "min"], + "Times": ["standard_name", "units", "calendar", "funky_name", "limits"], + } + ), + # meta_var2.yaml includes "_A" which _should_ get sorted infront of "_FillValue" + # meta_var2.yaml also adds a bunch of special-ish attrs names that could have been protected but don't seem to be + ( + "test/meta_var2.yaml", + { + "temp": ["units", "_FillValue", "missing_value", "long_name", "short_name", "max", "min", "_A", "scale_factor", "add_offset", "_Netcdf4Dimid", "REFERENCE_LIST"], + "Times": ["standard_name", "units", "calendar", "funky_name", "limits"], + } + ), + ] +) +@pytest.mark.parametrize("do_sort", [True, False]) +def test_var_sorting(make_nc, yaml, attr_lists, do_sort): + """ + Check the order of variable attrs are as expected given various command lines + """ + sort_cmd = "-s" if do_sort else "" + runcmd(f"addmeta -v {sort_cmd} -m {yaml} {make_nc}") + + for varname, attr_list in attr_lists.items(): + actual = get_meta_data_from_file(make_nc, varname) + + # Sort list items ignoring case + expected_attrs_order = sorted(attr_list, key=lambda item: item.casefold()) if do_sort else attr_list + + if "_FillValue" in expected_attrs_order and do_sort: + # _FillValue is reserved and cannot be added after a Variable has been created + # Thus it cannot be sorted (i.e. removed and then added back on in order) + # So move _FillValue back to the front of the expected list + expected_attrs_order.insert(0, expected_attrs_order.pop(expected_attrs_order.index("_FillValue"))) + + assert list(actual.keys()) == expected_attrs_order diff --git a/test/test_write_templated.py b/test/test_write_templated.py index 9014617..4f36be3 100644 --- a/test/test_write_templated.py +++ b/test/test_write_templated.py @@ -70,6 +70,8 @@ def test_add_templated_meta(make_nc): assert(dict2["size"] == size_before) assert(dict2["modification_time"] == mtime_before) +# This test generates ignorable warnings about __file__.parent & .name +@pytest.mark.filterwarnings("ignore:.+'__file__' is undefined") def test_undefined_meta(make_nc): dict1 = read_yaml("test/meta_undefined.yaml")