diff --git a/README.md b/README.md index bbc39d4..7d2d52d 100644 --- a/README.md +++ b/README.md @@ -158,7 +158,7 @@ Multiple datafiles can be specified, and the variables from each will be accessi in a namespace defined by the [stem of the filename](https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.stem). -#### Command line +##### Command line Template variables can also be directly specified via the command line option `--datavar` and can be accessed in the special namespace `__argdata__`. For example: @@ -185,6 +185,33 @@ addmeta -d job.yaml -m meta.yaml --datavar freq='1daily' file.nc ``` Multiple variables can be defined in this way with multiple `--datavar` options. +#### Number Templates + +In order for dynamically templated attributes to resolve to integers or floats rather than strings use the Jinja-like filter `| number`. +E.g. with the following datafile.yaml, +```yaml +integer_val: 5 +float_val: 1.234 +``` +a metadata file similar to the following can be used, +```yaml +global: + # This non-dynamic attribute resolves to an integer + this_is_a_number: 5 + # This dynamic attribute resolves to a string + this_is_a_string: "{{ datafile.integer }}" + # This dynamic attribute resolves to an integer + this_is_a_int: "{{ datafile.integer_val | number }}" + # These dynamic attributes resolve to floats + this_is_a_float: "{{ datafile.float_val | number }}" + this_is_also_a_float: "{{ datafile.integer_val | float | number }}" +``` + +- `| number` must be the last portion of the Jinja template (i.e. the string between `{{` and `}}`) +- `| number` is not valid Jinja itself, it will be removed before resolving the rest of the template with Jinja +- When using `| number`, `addmeta` will attempt to resolve the attribute's value to an integer first, then a float. + + ### metadata.yaml support ACCESS-NRI models produce, and intake catalogues consume, a `metadata.yaml` file diff --git a/addmeta/addmeta.py b/addmeta/addmeta.py index 1c3b7f8..083c6aa 100755 --- a/addmeta/addmeta.py +++ b/addmeta/addmeta.py @@ -186,7 +186,16 @@ def set_attribute(group, attribute, value, template_vars, verbose=False): # Only valid to use jinja templates on strings if isinstance(value, str): try: + value, convert_to_number = detect_number_filter(value) + value = Template(value, undefined=StrictUndefined).render(template_vars) + + if convert_to_number: + # Try to convert to an integer first then a float + try: + value = int(value) + except ValueError: + value = float(value) except UndefinedError as e: warn(f"Skip setting attribute '{attribute}': {e}") return @@ -195,6 +204,29 @@ def set_attribute(group, attribute, value, template_vars, verbose=False): group.setncattr(attribute, value) +def detect_number_filter(value): + """ + Look for the jinja-like filter "| number". + + If found return the value string with "| number" removed and True + Otherwise return the value string and False + + - There might be multiple occurances of "| number" + - Number of whitespace characters is unknown + """ + # Match "| number }}" with any number of whitespace between + regx = re.compile(r"(\|\s*number)\s*}}") + matches = re.findall(regx, value) + if matches: + # Remove the "| number" with however many spaces as captured by the regex + # Do this in a loop just in case there were multiple permutations of '| number' + for match in matches: + value = value.replace(match, '') + + return value, True + else: + return value, False + def serialise_dict_values(dictionary): """Serialise any list or arrays values in a dictionary""" return {k: array_to_csv(v) if isinstance(v, (tuple, list)) else v for k, v in dictionary.items()} diff --git a/test/test_write_templated.py b/test/test_write_templated.py index 9014617..f219952 100644 --- a/test/test_write_templated.py +++ b/test/test_write_templated.py @@ -21,10 +21,11 @@ from datetime import datetime, timezone, timedelta from pathlib import Path -import netCDF4 as nc +import numpy as np +import jinja2 import pytest -from addmeta import read_yaml, read_metadata, add_meta, find_and_add_meta, isoformat +from addmeta import read_yaml, read_metadata, add_meta, find_and_add_meta, isoformat, detect_number_filter from common import runcmd, make_nc, get_meta_data_from_file verbose = True @@ -286,3 +287,212 @@ def test_now(make_nc): meta_now = datetime.strptime(now_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc) utc_now = datetime.now(timezone.utc) assert meta_now - utc_now < timedelta(minutes=1) + +@pytest.mark.parametrize( + "metadata,templates,expected,number_type", + [ + # Test a raw number + ({"n": 5}, {}, {"n": 5}, np.int32), + # Test a templated integer + ( + {"n": "{{ __template__.x | number }}"}, + {"__template__": {"x": "5"}}, + {"n": 5}, + np.int32 + ), + # Test a templated integer cast to a float with jinja + ( + {"n": "{{ __template__.x | float | number }}"}, + {"__template__": {"x": "5"}}, + {"n": 5}, + np.float64 + ), + # Test a templated integer with no spaces + ( + {"n": "{{__template__.x|number}}"}, + {"__template__": {"x": "5"}}, + {"n": 5}, + np.int32 + ), + # Test a templated integer with excessive spaces + ( + {"n": "{{ __template__.x | number}}"}, + {"__template__": {"x": "5"}}, + {"n": 5}, + np.int32 + ), + # Test a templated integer with newline and tab characters + ( + {"n": "{{ __template__.x|\t\nnumber\n}}"}, + {"__template__": {"x": "5"}}, + {"n": 5}, + np.int32 + ), + # Test multiple "| numbers" + ( + {"n": "{{ __template__.x | number}}{{ __template__.x | number}}"}, + {"__template__": {"x": "5"}}, + {"n": 55}, + np.int32 + ), + # Test multiple "| numbers" with varying whitespace + ( + {"n": "{{__template__.x|number}}{{ __template__.x | number}}{{ __template__.x | number}}"}, + {"__template__": {"x": "5"}}, + {"n": 555}, + np.int32 + ), + # Test a templated integer without the jinja brackets + ( + {"n": "__template__.x | number"}, + {"__template__": {"x": "5"}}, + {"n": "__template__.x | number"}, + str + ), + # Test a templated integer with underscored notation + ( + {"n": "{{ __template__.x | number }}"}, + {"__template__": {"x": "5_000_000"}}, + {"n": 5000000}, + np.int32 + ), + # Test a templated float + ( + {"n": "{{ __template__.x | number }}"}, + {"__template__": {"x": "5.1"}}, + {"n": 5.1}, + np.float64 + ), + # Test a templated float that happens to be preceded by digits + ( + {"n": "123{{ __template__.x | number }}"}, + {"__template__": {"x": "5.1"}}, + {"n": 1235.1}, + np.float64 + ), + # Test a templated float with no decimal point numbers + ( + {"n": "{{ __template__.x | number }}"}, + {"__template__": {"x": "5."}}, + {"n": 5.}, + np.float64 + ), + # Test a templated float in exponential notation + ( + {"n": "{{ __template__.x | number }}"}, + {"__template__": {"x": "1.5e5"}}, + {"n": 1.5e5}, + np.float64 + ), + # Test a templated number without the dummy jinja filter + ( + {"n": "{{ __template__.x }}"}, + {"__template__": {"x": "5.1"}}, + {"n": "5.1"}, + str + ), + ] +) +def test_number_templates(make_nc, metadata, templates, expected, number_type): + # Put the metadata under global + metadata = {"global": metadata} + + # Add the attrs from make_nc to expected + common_attrs = { + "Publisher": "Will be overwritten", + "unlikelytobeoverwritten": "total rubbish", + } + expected.update(common_attrs) + + print(metadata) + print(templates) + find_and_add_meta([make_nc], metadata, templates, []) + + actual = get_meta_data_from_file(make_nc) + + assert actual == expected + assert isinstance(actual["n"], number_type) + +@pytest.mark.parametrize( + "metadata,templates,failure_str", + [ + # Test a string with the dummy number jinja filter + ( + {"n": "{{ __template__.x | number }}"}, + {"__template__": {"x": "five"}}, + None + ), + # Test a malformed float + ( + {"n": "{{ __template__.x | number }}"}, + {"__template__": {"x": "5.1.2"}}, + None + ), + # Test a valid float but with a string around it + ( + {"n": "xx{{ __template__.x | number }}xx"}, + {"__template__": {"x": "5.1"}}, + "xx5.1xx" + ), + ] +) +def test_number_templates_failures(make_nc, metadata, templates, failure_str): + # Put the metadata under global + metadata = {"global": metadata} + + value = templates["__template__"]["x"] + + # If failure string hasn't been supplied just use the template value + failure_str = failure_str if failure_str else value + with pytest.raises(ValueError, match=f"could not convert string to float: \'{failure_str}\'"): + find_and_add_meta([make_nc], metadata, templates, []) + +@pytest.mark.parametrize( + "metadata,templates", + [ + # Test with real jinja filter but without number last + ( + {"n": "{{ __template__.x | number | float }}"}, + {"__template__": {"x": "5.1"}}, + ), + ] +) +def test_number_templates_failure_filter_order(make_nc, metadata, templates): + # Put the metadata under global + metadata = {"global": metadata} + + with pytest.raises(jinja2.exceptions.TemplateAssertionError, match="No filter named \'number\'"): + find_and_add_meta([make_nc], metadata, templates, []) + +@pytest.mark.parametrize( + "input_string,expected", + [ + # No "| number }}" + ("", ("", False)), + ("nothing to see here", ("nothing to see here", False)), + ("| number", ("| number", False)), + # "| number }}" with various whitespace + ("{{|number}}", ("{{}}", True)), + ("{{ | number}}", ("{{ }}", True)), + ("{{ |\tnumber }}", ("{{ }}", True)), + ("{{ |\n\tnumber\n }}", ("{{ \n }}", True)), + # A typical expected pattern for | number + ("{{ x | number }}", ("{{ x }}", True)), + # Multiple templates + ("{{ x | number }}{{ x | number }}{{ x | number }}", ("{{ x }}{{ x }}{{ x }}", True)), + # Multiple templates with varying whitespace + ("{{ x | number }}{{ x | number }}{{ x | number }}", ("{{ x }}{{ x }}{{ x }}", True)), + ("{{ x | number }}{{ x |\tnumber\t}}{{ x |\nnumber\n}}", ("{{ x }}{{ x \t}}{{ x \n}}", True)), + # Stuff outside the template + ("something infront {{ x | number}}", ("something infront {{ x }}", True)), + ] +) +def test_detect_number_filter(input_string, expected): + """ + Test detect_number_filter + + Function looks for "| number }}" with varying whitespace and removes "| number" + """ + actual = detect_number_filter(input_string) + + assert actual == expected