Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ specified. If the same attribute is defined more than once, the last attribute
file specified takes precedence. Like cascading style sheets this means default
values can be given and overridden when necessary.

> [!NOTE]
> The `_FillValue` attribute of variables can be removed but not added or changed by `addmeta`.

### Dynamic templating

`addmeta` supports limited dynamic templating to allow injection of file specific
Expand Down Expand Up @@ -205,13 +208,25 @@ netCDF applications are expected to update the history attribute when modifying
the files. This can be enabled in `addmeta` with the `--update-history`
commandline argument.

### Sorting Attributes

Global and variable attributes can be sorted lexicographically ignoring-case by `addmeta` if needed.

Global attributes can be sorted with the `-s`/`--sort` argument.

Variables can be sorted with the `--sort-variable VARNAME` argument where `VARNAME` is the name of the variable to sort or a regex that will match the variable/s to sort.
Multiple `--sort-variable` arguments can be used to specify more than one variable name and/or regex.

> [!NOTE]
> The `_FillValue` attribute of variables cannot be sorted.

## Invocation

`addmeta` provides a command line interface. Invoking with the `-h` flag prints
a summay of how to invoke the program correctly.

$ addmeta -h
usage: addmeta [-h] [-c CMDLINEARGS] [-m METAFILES] [-l METALIST] [-d DATAFILES] [-f FNREGEX] [-s] [-v] [files ...]
usage: addmeta [-h] [-c CMDLINEARGS] [-m METAFILES] [-l METALIST] [-d DATAFILES] [-f FNREGEX] [-s] [--sort-variable SORT_VARIABLE] [-v] [files ...]

Add meta data to one or more netCDF files

Expand All @@ -231,6 +246,8 @@ a summay of how to invoke the program correctly.
-f FNREGEX, --fnregex FNREGEX
Extract metadata from filename using regex
-s, --sort Sort all keys lexicographically, ignoring case
--sort-variable SORT_VARIABLE
Sort given variables' attributes lexicographically, ignoring case. Use variable name or regex matching names.
--update-history Update or create the history global attribute
-v, --verbose Verbose output

Expand Down
71 changes: 52 additions & 19 deletions addmeta/addmeta.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,18 +106,21 @@ def update_history_attr(group, history, verbose=False):
group.setncattr("history", history)


def add_meta(ncfile, metadict, template_vars, sort_attrs=False, history=None, verbose=False):
def add_meta(ncfile, metadict, template_vars, sort_attrs=False, sort_vars=[], history=None, verbose=False):
Comment thread
joshuatorrance marked this conversation as resolved.
Outdated
"""
Add meta data from a dictionary to a netCDF file
"""

rootgrp = nc.Dataset(ncfile, "r+")
# Add metadata to matching variables
if "variables" in metadict:
for var, attr_dict in metadict["variables"].items():
if var in rootgrp.variables:
if varname_in_regex_list(var, sort_vars):
attr_dict = remove_update_sort_attrs(rootgrp.variables[var],
attr_dict)

for attr, value in attr_dict.items():
set_attribute(rootgrp.variables[var], attr, value, template_vars)
set_attribute(rootgrp.variables[var], attr, value, template_vars, verbose=verbose, var=var)

# Update (or create) the history attribute
if history:
Expand All @@ -126,12 +129,10 @@ def add_meta(ncfile, metadict, template_vars, sort_attrs=False, history=None, ve
# Set global meta data
if "global" in metadict:
if sort_attrs:
# Remove all global attributes, update with new attributes and then sort
# | merges two dicts preferring keys from the right
metadict['global'] = order_dict(delete_global_attributes(rootgrp) | metadict['global'])
metadict['global'] = remove_update_sort_attrs(rootgrp, metadict['global'])

for attr, value in metadict['global'].items():
set_attribute(rootgrp, attr, value, template_vars, verbose)
set_attribute(rootgrp, attr, value, template_vars, verbose=verbose)

rootgrp.close()

Expand Down Expand Up @@ -165,20 +166,24 @@ def array_to_csv(array):
else:
return f.getvalue()

def set_attribute(group, attribute, value, template_vars, verbose=False):
def set_attribute(group, attribute, value, template_vars, verbose=False, var=None,):
"""
Small wrapper to select, delete, or set attribute depending
on value passed and expand jinja template variables
"""
attr_name = f"{var}:{attribute}" if var else attribute

if value is None:
if attribute in group.__dict__:
try:
group.delncattr(attribute)
except UndefinedError as e:
warn(f"Could not delete attribute '{attribute}': {e}")
warn(f"Could not delete attribute '{attr_name}': {e}")
return
finally:
if verbose: print(f" - {attribute}")
if verbose: print(f" - {attr_name}")
else:
if verbose: print(f" - {attr_name} (nothing to delete)")
Comment thread
joshuatorrance marked this conversation as resolved.
else:
if isinstance(value, (list, tuple)):
value = array_to_csv(value)
Expand All @@ -188,10 +193,10 @@ def set_attribute(group, attribute, value, template_vars, verbose=False):
try:
value = Template(value, undefined=StrictUndefined).render(template_vars)
except UndefinedError as e:
warn(f"Skip setting attribute '{attribute}': {e}")
warn(f"Skip setting attribute '{attr_name}': {e}")
return
finally:
if verbose: print(f" + {attribute}: {value}")
if verbose: print(f" + {attr_name}: {value}")

group.setncattr(attribute, value)

Expand All @@ -209,7 +214,7 @@ def load_data_files(datafiles):

return namespace_dict

def find_and_add_meta(ncfiles, metadata, kwdata, fnregexs, sort_attrs=False, history=None, verbose=False):
def find_and_add_meta(ncfiles, metadata, kwdata, fnregexs, sort_attrs=False, sort_vars=[], history=None, verbose=False):
Comment thread
joshuatorrance marked this conversation as resolved.
Outdated
"""
Add meta data from 1 or more yaml formatted files to one or more
netCDF files
Expand All @@ -235,10 +240,27 @@ def find_and_add_meta(ncfiles, metadata, kwdata, fnregexs, sort_attrs=False, his
metadata,
template_vars,
sort_attrs=sort_attrs,
sort_vars=sort_vars,
history=history,
verbose=verbose
)


def varname_in_regex_list(varname, varname_list):
"""
Check if the given varname is present in the list of varnames regexs.

Add ^ and $ to the regexs if not already there
e.g. we don't want "time" to match "time_bnds" so we use "^time$"
"""
for varname_l in varname_list:
varname_l = varname_l if varname_l[0] == "^" else "^" + varname_l
varname_l = varname_l if varname_l[-1] == "$" else varname_l + "$"

if re.match(varname_l, varname):
Comment thread
joshuatorrance marked this conversation as resolved.
Outdated
return True

return False

def skip_comments(file):
"""Skip lines that begin with a comment character (#) or are empty
"""
Expand All @@ -253,15 +275,26 @@ def list_from_file(fname):

return filelist

def delete_global_attributes(rootgrp):
def remove_update_sort_attrs(ncgroup, attr_dict):
"""
Remove the attributes from a netCDF group, merge the removed attrs with the
provided dictionary (favouring the dict) and return the sorted result.
"""
# | merges two dicts preferring keys from the right
return order_dict(delete_group_attributes(ncgroup) | attr_dict)

def delete_group_attributes(ncgroup):
"""
Delete all global attributes and return as dict
Delete all attributes for a netCDF group and return as dict
"""
deleted = {}

for attr in rootgrp.ncattrs():
deleted[attr] = rootgrp.getncattr(attr)
rootgrp.delncattr(attr)
for attr in ncgroup.ncattrs():
# Not allow to add _FillValue as attr after variable creation
# Thus can't add it back on while sorting
if not (isinstance(ncgroup, nc.Variable) and attr == "_FillValue"):
deleted[attr] = ncgroup.getncattr(attr)
ncgroup.delncattr(attr)

return deleted

Expand Down
4 changes: 3 additions & 1 deletion addmeta/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ def parse_args(args):
parser.add_argument("-d","--datafiles", help="One or more key/value data files in YAML format", action='append')
parser.add_argument("-f","--fnregex", help="Extract metadata from filename using regex", default=[], action='append')
parser.add_argument("--datavar", help="Key/value pair to be added as data variable, e.g. --datavar 'var=value'", default=[], action='append')
parser.add_argument("-s","--sort", help="Sort all keys lexicographically, ignoring case", action="store_true")
parser.add_argument("-s","--sort", help="Sort global attributes lexicographically, ignoring case", action="store_true")
parser.add_argument("--sort-variable", help="Sort given variables' attributes lexicographically, ignoring case. Use variable name or regex matching names.", default=[], action="append")
parser.add_argument("--update-history", help="Update (or create) the history global attribute", action="store_true")
Comment thread
joshuatorrance marked this conversation as resolved.
parser.add_argument("-v","--verbose", help="Verbose output", action='store_true')
parser.add_argument("files", help="netCDF files", nargs='*')
Expand Down Expand Up @@ -111,6 +112,7 @@ def main(args):
kwdata,
args.fnregex,
sort_attrs=args.sort,
sort_vars=args.sort_variable,
history=history,
verbose=verbose,
)
Expand Down
2 changes: 1 addition & 1 deletion test/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def runcmd(cmd, rwd=None, env=None):
local_env = os.environ.copy()
if env is not None:
local_env.update(env)
subprocess.run(shlex.split(cmd),stderr=subprocess.STDOUT, cwd=cwd, env=local_env)
subprocess.run(shlex.split(cmd),stderr=subprocess.STDOUT, cwd=cwd, env=local_env, check=True)

@pytest.fixture
def make_nc(tmp_path):
Expand Down
8 changes: 8 additions & 0 deletions test/meta1_FillValue.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
global:
Publisher: "ARC Centre of Excellence for Climate System Science"
Year: 2017
# Note the following two are to test that there can be key/variable
# pairs using the two reserved names as long as they're not a dictionary
variables: "temp, salt, salinity"
global: "yes"
_FillValue: "allowed to set this for global attrs"
15 changes: 15 additions & 0 deletions test/meta_var2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
variables:
temp:
short_name: "temp"
units : "degrees Kelvin"
max : 600
min : -100
_A : "Might get sorted before _FillValue"
missing_value: "Is this varname protected like _FillValue?"
scale_factor: "Is this varname protected like _FillValue?"
add_offset: "Is this varname protected like _FillValue?"
_Netcdf4Dimid: "Is this varname protected like _FillValue?"
REFERENCE_LIST: "Is this varname protected like _FillValue?"
Times:
funky_name: "It is totall time"
limits : 0
16 changes: 16 additions & 0 deletions test/meta_var2_FillValue.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
variables:
temp:
short_name: "temp"
units : "degrees Kelvin"
max : 600
min : -100
_A : "Might get sorted before _FillValue"
missing_value: "Is this varname protected like _FillValue?"
scale_factor: "Is this varname protected like _FillValue?"
add_offset: "Is this varname protected like _FillValue?"
_Netcdf4Dimid: "Is this varname protected like _FillValue?"
REFERENCE_LIST: "Is this varname protected like _FillValue?"
_FillValue: "Not allowed to set this"
Times:
funky_name: "It is totall time"
limits : 0
56 changes: 41 additions & 15 deletions test/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def test_cmdlinearg_from_file(mock_main, touch_nc):
fnregex=["'\\d{3]\\.'", "'(?:group\\d{3])\\.nc'"],
datavar=[],
sort=False,
sort_variable=[],
verbose=False,
update_history=False,
files=touch_nc[0:2],
Expand Down Expand Up @@ -94,7 +95,7 @@ def test_missing_cmdlinearg_file():
with pytest.raises(SystemExit, match=f"Error: cmdlineargs file '{fname}' not found"):
addmeta.cli.main_parse_args(args)

def test_missing_cmdlinearg_file():
def test_missing_cmdlinearg_file2():

fname = "filedoesnotexist"

Expand All @@ -104,17 +105,42 @@ def test_missing_cmdlinearg_file():
addmeta.cli.main(addmeta.cli.main_parse_args(args))

@patch('addmeta.cli.main')
def test_datavar_option(mock_main, touch_nc):

args = ["--datavar","one=1","--datavar='two=2 words'", touch_nc[0]]

assert addmeta.cli.main_parse_args(args) == Namespace(cmdlineargs=None,
metafiles=None,
metalist=None,
datafiles=None,
fnregex=[],
datavar=['one=1', "'two=2 words'"],
sort=False,
verbose=False,
update_history=False,
files=['test/ocean_1.nc'])
@pytest.mark.parametrize("args,expected_namespace",
[
# Test datavar option
pytest.param(
["--datavar","one=1","--datavar='two=2 words'"],
Namespace(cmdlineargs=None,
metafiles=None,
metalist=None,
datafiles=None,
fnregex=[],
datavar=['one=1', "'two=2 words'"],
sort=False,
sort_variable=[],
verbose=False,
update_history=False,
files=['test/ocean_1.nc'])
),
# Test --sort-variable option
pytest.param(
["--sort-variable","var1","--sort-variable=var2", "--sort-variable", "multiple words"],
Namespace(cmdlineargs=None,
metafiles=None,
metalist=None,
datafiles=None,
fnregex=[],
datavar=[],
sort=False,
sort_variable=["var1", "var2", "multiple words"],
verbose=False,
update_history=False,
files=['test/ocean_1.nc'])
),
]
)
def test_options(mock_main, touch_nc, args, expected_namespace):

args = [*args, touch_nc[0]]

assert addmeta.cli.main_parse_args(args) == expected_namespace
33 changes: 28 additions & 5 deletions test/test_read_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,22 @@ def test_list_from_file():
fname = 'test/metalist'
filelist = list_from_file(fname)
assert(filelist == [Path('test/meta1.yaml'), Path('test/meta2.yaml')])

def test_add_meta(make_nc):
dict1 = read_metadata("test/meta1.yaml")

@pytest.mark.parametrize("global_yaml,variable_yaml",
[
("test/meta1.yaml", "test/meta_var1.yaml"),
# meta_var2.yaml includes the special attrs (similar to _FillValue)
# that are not protected by netCDF4 like _FillValue is
("test/meta1.yaml", "test/meta_var2.yaml"),
]
)
def test_add_meta(make_nc, global_yaml, variable_yaml):
dict1 = read_metadata(global_yaml)
add_meta(make_nc, dict1, {})

assert(dict1_in_dict2(dict1["global"], get_meta_data_from_file(make_nc)))

dict1 = read_metadata("test/meta_var1.yaml")
dict1 = read_metadata(variable_yaml)
add_meta(make_nc, dict1, {})

for var in dict1["variables"]:
Expand Down Expand Up @@ -150,4 +158,19 @@ def test_del_attributes(make_nc):
attributes = get_meta_data_from_file(make_nc, 'temp')
assert( '_FillValue' not in attributes )
assert( 'Tiddly' in attributes )
assert( 'Kelvin' == attributes['units'] )
assert( 'Kelvin' == attributes['units'] )

def test_find_add_meta_FillValue(make_nc):
"""
A duplicate of test_find_add_meta with _FillValue

Test that setting the global attr _FillValue is as normal and that setting
a variable _FillValue raises an exception.
"""
find_and_add_meta( [make_nc], combine_meta(['test/meta2.yaml','test/meta1_FillValue.yaml']), {}, {})

dict1 = read_metadata("test/meta1.yaml")
assert(dict1_in_dict2(dict1["global"], get_meta_data_from_file(make_nc)))

with pytest.raises(AttributeError, match="_FillValue attribute must be set when variable is created"):
find_and_add_meta( [make_nc], combine_meta(['test/meta_var2_FillValue.yaml']), {}, {} )
Loading
Loading