Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ specified. If the same attribute is defined more than once, the last attribute
file specified takes precedence. Like cascading style sheets this means default
values can be given and overridden when necessary.

> [!NOTE]
> The `_FillValue` attribute of variables can be removed but not added or changed by `addmeta`.

### Dynamic templating

`addmeta` supports limited dynamic templating to allow injection of file specific
Expand Down Expand Up @@ -205,6 +208,15 @@ netCDF applications are expected to update the history attribute when modifying
the files. This can be enabled in `addmeta` with the `--update-history`
commandline argument.

### Sorting Attributes

Global and variable attributes can be sorted lexicographically ignoring-case by `addmeta` if needed.

Sorting for all global and variable attributes can be enabled with the `-s`/`--sort` argument.

> [!NOTE]
> The `_FillValue` attribute of variables cannot be sorted.

## Invocation

`addmeta` provides a command line interface. Invoking with the `-h` flag prints
Expand All @@ -230,7 +242,7 @@ a summay of how to invoke the program correctly.
One or more key/value data files in YAML format
-f FNREGEX, --fnregex FNREGEX
Extract metadata from filename using regex
-s, --sort Sort all keys lexicographically, ignoring case
-s, --sort Sort global and variable attributes lexicographically, ignoring case
--update-history Update or create the history global attribute
-v, --verbose Verbose output

Expand Down
50 changes: 33 additions & 17 deletions addmeta/addmeta.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,17 @@ def add_meta(ncfile, metadict, template_vars, sort_attrs=False, history=None, ve
"""
Add meta data from a dictionary to a netCDF file
"""

rootgrp = nc.Dataset(ncfile, "r+")
# Add metadata to matching variables
if "variables" in metadict:
for var, attr_dict in metadict["variables"].items():
if var in rootgrp.variables:
if sort_attrs:
attr_dict = remove_update_sort_attrs(rootgrp.variables[var],
attr_dict)

for attr, value in attr_dict.items():
set_attribute(rootgrp.variables[var], attr, value, template_vars)
set_attribute(rootgrp.variables[var], attr, value, template_vars, verbose=verbose, var=var)

# Update (or create) the history attribute
if history:
Expand All @@ -126,12 +129,10 @@ def add_meta(ncfile, metadict, template_vars, sort_attrs=False, history=None, ve
# Set global meta data
if "global" in metadict:
if sort_attrs:
# Remove all global attributes, update with new attributes and then sort
# | merges two dicts preferring keys from the right
metadict['global'] = order_dict(delete_global_attributes(rootgrp) | metadict['global'])
metadict['global'] = remove_update_sort_attrs(rootgrp, metadict['global'])

for attr, value in metadict['global'].items():
set_attribute(rootgrp, attr, value, template_vars, verbose)
set_attribute(rootgrp, attr, value, template_vars, verbose=verbose)

rootgrp.close()

Expand Down Expand Up @@ -165,20 +166,24 @@ def array_to_csv(array):
else:
return f.getvalue()

def set_attribute(group, attribute, value, template_vars, verbose=False):
def set_attribute(group, attribute, value, template_vars, verbose=False, var=None,):
"""
Small wrapper to select, delete, or set attribute depending
on value passed and expand jinja template variables
"""
attr_name = f"{var}:{attribute}" if var else attribute

if value is None:
if attribute in group.__dict__:
try:
group.delncattr(attribute)
except UndefinedError as e:
warn(f"Could not delete attribute '{attribute}': {e}")
warn(f"Could not delete attribute '{attr_name}': {e}")
return
finally:
if verbose: print(f" - {attribute}")
if verbose: print(f" - {attr_name}")
else:
if verbose: print(f" - {attr_name} (nothing to delete)")
Comment thread
joshuatorrance marked this conversation as resolved.
else:
if isinstance(value, (list, tuple)):
value = array_to_csv(value)
Expand All @@ -188,10 +193,10 @@ def set_attribute(group, attribute, value, template_vars, verbose=False):
try:
value = Template(value, undefined=StrictUndefined).render(template_vars)
except UndefinedError as e:
warn(f"Skip setting attribute '{attribute}': {e}")
warn(f"Skip setting attribute '{attr_name}': {e}")
return
finally:
if verbose: print(f" + {attribute}: {value}")
if verbose: print(f" + {attr_name}: {value}")

group.setncattr(attribute, value)

Expand Down Expand Up @@ -238,7 +243,7 @@ def find_and_add_meta(ncfiles, metadata, kwdata, fnregexs, sort_attrs=False, his
history=history,
verbose=verbose
)

def skip_comments(file):
"""Skip lines that begin with a comment character (#) or are empty
"""
Expand All @@ -253,15 +258,26 @@ def list_from_file(fname):

return filelist

def delete_global_attributes(rootgrp):
def remove_update_sort_attrs(ncgroup, attr_dict):
"""
Remove the attributes from a netCDF group, merge the removed attrs with the
provided dictionary (favouring the dict) and return the sorted result.
"""
# | merges two dicts preferring keys from the right
return order_dict(delete_group_attributes(ncgroup) | attr_dict)

def delete_group_attributes(ncgroup):
"""
Delete all global attributes and return as dict
Delete all attributes for a netCDF group and return as dict
"""
deleted = {}

for attr in rootgrp.ncattrs():
deleted[attr] = rootgrp.getncattr(attr)
rootgrp.delncattr(attr)
for attr in ncgroup.ncattrs():
# Not allow to add _FillValue as attr after variable creation
# Thus can't add it back on while sorting
if not (isinstance(ncgroup, nc.Variable) and attr == "_FillValue"):
deleted[attr] = ncgroup.getncattr(attr)
ncgroup.delncattr(attr)

return deleted

Expand Down
2 changes: 1 addition & 1 deletion addmeta/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def parse_args(args):
parser.add_argument("-d","--datafiles", help="One or more key/value data files in YAML format", action='append')
parser.add_argument("-f","--fnregex", help="Extract metadata from filename using regex", default=[], action='append')
parser.add_argument("--datavar", help="Key/value pair to be added as data variable, e.g. --datavar 'var=value'", default=[], action='append')
parser.add_argument("-s","--sort", help="Sort all keys lexicographically, ignoring case", action="store_true")
parser.add_argument("-s","--sort", help="Sort global and variable attributes lexicographically, ignoring case", action="store_true")
parser.add_argument("--update-history", help="Update (or create) the history global attribute", action="store_true")
Comment thread
joshuatorrance marked this conversation as resolved.
parser.add_argument("-v","--verbose", help="Verbose output", action='store_true')
parser.add_argument("files", help="netCDF files", nargs='*')
Expand Down
2 changes: 1 addition & 1 deletion test/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def runcmd(cmd, rwd=None, env=None):
local_env = os.environ.copy()
if env is not None:
local_env.update(env)
subprocess.run(shlex.split(cmd),stderr=subprocess.STDOUT, cwd=cwd, env=local_env)
subprocess.run(shlex.split(cmd),stderr=subprocess.STDOUT, cwd=cwd, env=local_env, check=True)

@pytest.fixture
def make_nc(tmp_path):
Expand Down
8 changes: 8 additions & 0 deletions test/meta1_FillValue.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
global:
Publisher: "ARC Centre of Excellence for Climate System Science"
Year: 2017
# Note the following two are to test that there can be key/variable
# pairs using the two reserved names as long as they're not a dictionary
variables: "temp, salt, salinity"
global: "yes"
_FillValue: "allowed to set this for global attrs"
15 changes: 15 additions & 0 deletions test/meta_var2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
variables:
temp:
short_name: "temp"
units : "degrees Kelvin"
max : 600
min : -100
_A : "Might get sorted before _FillValue"
missing_value: "Is this varname protected like _FillValue?"
scale_factor: "Is this varname protected like _FillValue?"
add_offset: "Is this varname protected like _FillValue?"
_Netcdf4Dimid: "Is this varname protected like _FillValue?"
REFERENCE_LIST: "Is this varname protected like _FillValue?"
Times:
funky_name: "It is totall time"
limits : 0
16 changes: 16 additions & 0 deletions test/meta_var2_FillValue.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
variables:
temp:
short_name: "temp"
units : "degrees Kelvin"
max : 600
min : -100
_A : "Might get sorted before _FillValue"
missing_value: "Is this varname protected like _FillValue?"
scale_factor: "Is this varname protected like _FillValue?"
add_offset: "Is this varname protected like _FillValue?"
_Netcdf4Dimid: "Is this varname protected like _FillValue?"
REFERENCE_LIST: "Is this varname protected like _FillValue?"
_FillValue: "Not allowed to set this"
Times:
funky_name: "It is totall time"
limits : 0
39 changes: 24 additions & 15 deletions test/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def test_missing_cmdlinearg_file():
with pytest.raises(SystemExit, match=f"Error: cmdlineargs file '{fname}' not found"):
addmeta.cli.main_parse_args(args)

def test_missing_cmdlinearg_file():
def test_missing_cmdlinearg_file2():

fname = "filedoesnotexist"

Expand All @@ -104,17 +104,26 @@ def test_missing_cmdlinearg_file():
addmeta.cli.main(addmeta.cli.main_parse_args(args))

@patch('addmeta.cli.main')
def test_datavar_option(mock_main, touch_nc):

args = ["--datavar","one=1","--datavar='two=2 words'", touch_nc[0]]

assert addmeta.cli.main_parse_args(args) == Namespace(cmdlineargs=None,
metafiles=None,
metalist=None,
datafiles=None,
fnregex=[],
datavar=['one=1', "'two=2 words'"],
sort=False,
verbose=False,
update_history=False,
files=['test/ocean_1.nc'])
@pytest.mark.parametrize("args,expected_namespace",
[
# Test datavar option
pytest.param(
["--datavar","one=1","--datavar='two=2 words'"],
Namespace(cmdlineargs=None,
metafiles=None,
metalist=None,
datafiles=None,
fnregex=[],
datavar=['one=1', "'two=2 words'"],
sort=False,
verbose=False,
update_history=False,
files=['test/ocean_1.nc'])
),
]
)
def test_options(mock_main, touch_nc, args, expected_namespace):

args = [*args, touch_nc[0]]

assert addmeta.cli.main_parse_args(args) == expected_namespace
33 changes: 28 additions & 5 deletions test/test_read_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,22 @@ def test_list_from_file():
fname = 'test/metalist'
filelist = list_from_file(fname)
assert(filelist == [Path('test/meta1.yaml'), Path('test/meta2.yaml')])

def test_add_meta(make_nc):
dict1 = read_metadata("test/meta1.yaml")

@pytest.mark.parametrize("global_yaml,variable_yaml",
[
("test/meta1.yaml", "test/meta_var1.yaml"),
# meta_var2.yaml includes the special attrs (similar to _FillValue)
# that are not protected by netCDF4 like _FillValue is
("test/meta1.yaml", "test/meta_var2.yaml"),
]
)
def test_add_meta(make_nc, global_yaml, variable_yaml):
dict1 = read_metadata(global_yaml)
add_meta(make_nc, dict1, {})

assert(dict1_in_dict2(dict1["global"], get_meta_data_from_file(make_nc)))

dict1 = read_metadata("test/meta_var1.yaml")
dict1 = read_metadata(variable_yaml)
add_meta(make_nc, dict1, {})

for var in dict1["variables"]:
Expand Down Expand Up @@ -150,4 +158,19 @@ def test_del_attributes(make_nc):
attributes = get_meta_data_from_file(make_nc, 'temp')
assert( '_FillValue' not in attributes )
assert( 'Tiddly' in attributes )
assert( 'Kelvin' == attributes['units'] )
assert( 'Kelvin' == attributes['units'] )

def test_find_add_meta_FillValue(make_nc):
"""
A duplicate of test_find_add_meta with _FillValue

Test that setting the global attr _FillValue is as normal and that setting
a variable _FillValue raises an exception.
"""
find_and_add_meta( [make_nc], combine_meta(['test/meta2.yaml','test/meta1_FillValue.yaml']), {}, {})

dict1 = read_metadata("test/meta1.yaml")
assert(dict1_in_dict2(dict1["global"], get_meta_data_from_file(make_nc)))

with pytest.raises(AttributeError, match="_FillValue attribute must be set when variable is created"):
find_and_add_meta( [make_nc], combine_meta(['test/meta_var2_FillValue.yaml']), {}, {} )
43 changes: 42 additions & 1 deletion test/test_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
"""

import pytest
import netCDF4
import xarray

from addmeta import order_dict
Expand Down Expand Up @@ -170,3 +169,45 @@ def test_multisort(use_xarray, make_xarray_nc, make_nc):
assert actual == expected
# Check the order of the attrs is correct
assert list(actual.keys()) == list(expected.keys())

@pytest.mark.parametrize("yaml,attr_lists",
[
(
"test/meta_var1.yaml",
{
"temp": ["units", "_FillValue", "missing_value", "long_name", "short_name", "max", "min"],
"Times": ["standard_name", "units", "calendar", "funky_name", "limits"],
}
),
# meta_var2.yaml includes "_A" which _should_ get sorted infront of "_FillValue"
# meta_var2.yaml also adds a bunch of special-ish attrs names that could have been protected but don't seem to be
(
"test/meta_var2.yaml",
{
"temp": ["units", "_FillValue", "missing_value", "long_name", "short_name", "max", "min", "_A", "scale_factor", "add_offset", "_Netcdf4Dimid", "REFERENCE_LIST"],
"Times": ["standard_name", "units", "calendar", "funky_name", "limits"],
}
),
]
)
@pytest.mark.parametrize("do_sort", [True, False])
def test_var_sorting(make_nc, yaml, attr_lists, do_sort):
"""
Check the order of variable attrs are as expected given various command lines
"""
sort_cmd = "-s" if do_sort else ""
runcmd(f"addmeta -v {sort_cmd} -m {yaml} {make_nc}")

for varname, attr_list in attr_lists.items():
actual = get_meta_data_from_file(make_nc, varname)

# Sort list items ignoring case
expected_attrs_order = sorted(attr_list, key=lambda item: item.casefold()) if do_sort else attr_list

if "_FillValue" in expected_attrs_order and do_sort:
# _FillValue is reserved and cannot be added after a Variable has been created
# Thus it cannot be sorted (i.e. removed and then added back on in order)
# So move _FillValue back to the front of the expected list
expected_attrs_order.insert(0, expected_attrs_order.pop(expected_attrs_order.index("_FillValue")))

assert list(actual.keys()) == expected_attrs_order
2 changes: 2 additions & 0 deletions test/test_write_templated.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ def test_add_templated_meta(make_nc):
assert(dict2["size"] == size_before)
assert(dict2["modification_time"] == mtime_before)

# This test generates ignorable warnings about __file__.parent & .name
@pytest.mark.filterwarnings("ignore:.+'__file__' is undefined")
def test_undefined_meta(make_nc):

dict1 = read_yaml("test/meta_undefined.yaml")
Expand Down
Loading