diff --git a/docs/src/conf.py b/docs/src/conf.py index 4d8fcce56b..82a38a60b8 100644 --- a/docs/src/conf.py +++ b/docs/src/conf.py @@ -405,6 +405,7 @@ def _dotv(version): "https://twitter.com/scitools_iris", "https://stackoverflow.com/questions/tagged/python-iris", "https://www.flaticon.com/", + "https://www.mail-archive.com/dri-devel@lists.sourceforge.net/msg39091.html", ] # list of sources to exclude from the build. diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index b243796082..55ef5d49f0 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -73,6 +73,12 @@ This document explains the changes made to Iris for this release render text in the bottom right of the plot figure. (:issue:`6247`, :pull:`6332`) +#. `@trexfeathers`_ and `@pp-mo`_ added :const:`iris.loading.LOAD_PROBLEMS` to + capture objects that could not be loaded correctly, increasing transparency + and helping users to fix loading problems via the Iris API. As a first pass, + this is currently limited to ``standard_name`` and dimension coordinates from + NetCDF files. (:issue:`6317`, :pull:`6338`) + 🐛 Bugs Fixed ============= @@ -99,6 +105,19 @@ This document explains the changes made to Iris for this release Once conversion from unittest to pytest is completed, :class:`iris.tests.IrisTest` class will be deprecated. +#. `@trexfeathers`_ and `@pp-mo`_ significantly changed Iris' NetCDF loading + warnings as part of the :const:`~iris.loading.LOAD_PROBLEMS` work. Several + loading operations no longer raise their own warnings; instead their loading + problems are captured in :const:`~iris.loading.LOAD_PROBLEMS`. When + :const:`~iris.loading.LOAD_PROBLEMS` is populated, a single + :class:`~iris.warnings.IrisLoadWarning` is raised; see below. + As a first pass, this changed behaviour is currently limited to + ``standard_name`` and dimension coordinates from NetCDF files. (:pull:`6338`) + + .. code-block:: none + + IrisLoadWarning: Not all file objects were parsed correctly. See iris.loading.LOAD_PROBLEMS for details. + 🚀 Performance Enhancements =========================== @@ -166,7 +185,7 @@ This document explains the changes made to Iris for this release necessary. (:issue:`6285`, :pull:`6288`) #. `@trexfeathers`_ improved the handling of benchmark environments, especially - when working across Python versions. (:pull:`6329`) + when working across Python versions. (:pull:`6329`) #. `@trexfeathers`_ temporarily pinned Sphinx to `<8.2`. (:pull:`6344`, :issue:`6345`) diff --git a/lib/iris/common/mixin.py b/lib/iris/common/mixin.py index e238ab9d36..dae10abc9b 100644 --- a/lib/iris/common/mixin.py +++ b/lib/iris/common/mixin.py @@ -73,7 +73,6 @@ class LimitedAttributeDict(dict): """ - #: Attributes with special CF meaning, forbidden in Iris attribute dictionaries. CF_ATTRS_FORBIDDEN = ( "standard_name", "long_name", @@ -94,6 +93,15 @@ class LimitedAttributeDict(dict): "scale_factor", "_FillValue", ) + """Attributes with special CF meaning, forbidden in Iris attribute dictionaries.""" + + IRIS_RAW = "IRIS_RAW" + """Key used by Iris to store ALL attributes when problems are encountered during loading. + + See Also + -------- + iris.loading.LOAD_PROBLEMS: The destination for captured loading problems. + """ def __init__(self, *args, **kwargs): dict.__init__(self, *args, **kwargs) diff --git a/lib/iris/fileformats/_nc_load_rules/actions.py b/lib/iris/fileformats/_nc_load_rules/actions.py index c65483a908..09d149837e 100644 --- a/lib/iris/fileformats/_nc_load_rules/actions.py +++ b/lib/iris/fileformats/_nc_load_rules/actions.py @@ -45,6 +45,7 @@ from iris.config import get_logger import iris.fileformats.cf import iris.fileformats.pp as pp +from iris.loading import LOAD_PROBLEMS import iris.warnings from . import helpers as hh @@ -104,6 +105,10 @@ def inner(engine, *args, **kwargs): @action_function def action_default(engine): """Perform standard operations for every cube.""" + # Future pattern (iris#6319). + hh.build_and_add_names(engine) + + # Legacy pattern. hh.build_cube_metadata(engine) @@ -286,6 +291,7 @@ def action_build_dimension_coordinate(engine, providescoord_fact): cf_var = engine.cf_var.cf_group[var_name] rule_name = f"fc_build_coordinate_({coord_type})" coord_grid_class, coord_name = _COORDTYPE_GRIDTYPES_AND_COORDNAMES[coord_type] + succeed = None if coord_grid_class is None: # Coordinates not identified with a specific grid-type class (latlon, # rotated or projected) are always built, but can have no coord-system. @@ -367,9 +373,28 @@ def action_build_dimension_coordinate(engine, providescoord_fact): assert coord_grid_class in grid_classes if succeed: - hh.build_dimension_coordinate( + hh.build_and_add_dimension_coordinate( engine, cf_var, coord_name=coord_name, coord_system=coord_system ) + + else: + message = f"Dimension coordinate {var_name} not created. Debug info:\n" + if succeed is None: + message += "An unexpected error occurred" + error = NotImplementedError(message) + else: + message += rule_name + error = ValueError(message) + + try: + raise error + except error.__class__ as error: + _ = LOAD_PROBLEMS.record( + filename=engine.filename, + loaded=hh.build_raw_cube(cf_var, engine.filename), + exception=error, + ) + return rule_name diff --git a/lib/iris/fileformats/_nc_load_rules/engine.py b/lib/iris/fileformats/_nc_load_rules/engine.py index 48092508a4..60b6de596e 100644 --- a/lib/iris/fileformats/_nc_load_rules/engine.py +++ b/lib/iris/fileformats/_nc_load_rules/engine.py @@ -19,6 +19,10 @@ """ +from iris.coords import _DimensionalMetadata +from iris.cube import Cube +from iris.fileformats.cf import CFDataVariable + from .actions import run_actions @@ -74,6 +78,11 @@ class Engine: """ + cf_var: CFDataVariable | None + cube: Cube | None + cube_parts: dict[str, list[tuple[_DimensionalMetadata, str]]] | None + filename: str | None + def __init__(self): """Init new engine.""" self.reset() diff --git a/lib/iris/fileformats/_nc_load_rules/helpers.py b/lib/iris/fileformats/_nc_load_rules/helpers.py index bd9d625b8f..9c1884d716 100644 --- a/lib/iris/fileformats/_nc_load_rules/helpers.py +++ b/lib/iris/fileformats/_nc_load_rules/helpers.py @@ -16,8 +16,10 @@ from __future__ import annotations +import contextlib +from functools import partial import re -from typing import TYPE_CHECKING, List, Optional +from typing import TYPE_CHECKING, Any, List, Optional import warnings import cf_units @@ -28,13 +30,15 @@ import iris from iris._deprecation import warn_deprecated import iris.aux_factory -from iris.common.mixin import _get_valid_standard_name +from iris.common.mixin import LimitedAttributeDict, _get_valid_standard_name import iris.coord_systems import iris.coords +from iris.cube import Cube import iris.exceptions import iris.fileformats.cf as cf import iris.fileformats.netcdf from iris.fileformats.netcdf.loader import _get_cf_var_data +from iris.loading import LOAD_PROBLEMS, LoadProblems import iris.std_names import iris.util import iris.warnings @@ -44,6 +48,8 @@ from iris.fileformats.cf import CFBoundaryVariable + from .engine import Engine + # TODO: should un-addable coords / cell measures / etcetera be skipped? iris#5068. # @@ -453,26 +459,194 @@ def parse_cell_methods(nc_cell_methods, cf_name=None): return tuple(cell_methods) +def _add_or_capture( + build_func: partial, + add_method: partial, + filename: str, + cf_var: iris.fileformats.cf.CFVariable, + attr_key: Optional[str] = None, +) -> Optional[LoadProblems.Problem]: + """Build & add objects to the Cube, capturing problem objects - common code. + + Problems are captured in :const:`iris.loading.LOAD_PROBLEMS`. + + Parameters + ---------- + build_func : ``functools.partial`` + A function that builds the object-to-be-added. Passed as a + :class:`~functools.partial` instance so + that argument complexities can be handled by the caller, while execution + is deferred until the appropriate time within :func:`_add_or_capture`. + The passed :class:`~functools.partial` instance must have ALL arguments + already bound, and when called it must return the object that will be + added to the Cube. + add_method : ``functools.partial`` + A function that takes the object returned by `build_func` and adds it to + the Cube. Passed as a :class:`~functools.partial` instance to allow + further arguments to be bound by the caller. + filename : str + The ``filename`` attribute of the + :class:`iris.fileformats._nc_load_rules.engine.Engine` that is handling + the loading. This is the name of the file being loaded. + cf_var : iris.fileformats.cf.CFVariable + The CFVariable object that provides the info for building the + object-to-be-added. Used in case of an error, to build the most basic + :class:`~iris.cube.Cube` possible - for adding to + :const:`iris.loading.LOAD_PROBLEMS`. + attr_key : str, optional + The attribute-of-interest on `cf_var`, if applicable. For example: in + some cases we are building a coordinate using the entire of `cf_var` - + no `attr_key` needed - but in other cases we are 'building' a + standard_name by getting this key from `cf_var`. + + Returns + ------- + iris.loading.LoadProblems.Problem or None + The captured problem, if any; the same object that is added to + :const:`iris.loading.LOAD_PROBLEMS`. + + See Also + -------- + iris.loading.LoadProblems.Problem: The type of the returned object. + iris.loading.LOAD_PROBLEMS: The destination for captured problems. + """ + captured: Cube | dict[str, Any] | None = None + load_problems_entry: LoadProblems.Problem | None = None + + try: + built = build_func() + + except Exception as exc_build: + # Problems CREATING the desired object. + # Fully suppress further problems since we're just trying to do our + # best to capture objects IF possible. + if attr_key is not None: + captured_attr = None + with contextlib.suppress(AttributeError): + captured_attr = getattr(cf_var, attr_key) + captured = {attr_key: captured_attr} + else: + with contextlib.suppress(Exception): + captured = build_raw_cube(cf_var, filename) + + load_problems_entry = LOAD_PROBLEMS.record( + filename=filename, + loaded=captured, + exception=exc_build, + ) + + else: + try: + add_method(built) + except Exception as exc_add: + # Problems ADDING the built object to the Cube. + if attr_key is not None: + captured = {attr_key: built} + else: + captured = built + + load_problems_entry = LOAD_PROBLEMS.record( + filename=filename, loaded=captured, exception=exc_add + ) + + return load_problems_entry + + +################################################################################ +def build_raw_cube(cf_var: cf.CFVariable, filename: str) -> Cube: + """Build the most basic Cube possible - used as a 'last resort' fallback.""" + # TODO: dataless Cubes might be an opportunity for _get_cf_var_data() to return None? + data = _get_cf_var_data(cf_var, filename) + raw_attributes = {key: value for key, value in cf_var.cf_attrs()} + # Not a real attribute, but this is 'Iris language'. + raw_attributes["var_name"] = cf_var.cf_name + attributes = {LimitedAttributeDict.IRIS_RAW: raw_attributes} + return Cube(data=data, attributes=attributes) + + +################################################################################ +# TODO: propagate the the build-and-add pattern to all other objects (iris#6319). + + +def _build_name_standard(cf_var: cf.CFVariable) -> str | None: + value = getattr(cf_var, CF_ATTR_STD_NAME, None) + if value is not None: + standard_name = _get_valid_standard_name(value) + else: + standard_name = value + return standard_name + + +def _build_name_long(cf_var: cf.CFVariable) -> str | None: + return getattr(cf_var, CF_ATTR_LONG_NAME, None) + + +def _build_name_var(cf_var: cf.CFVariable) -> str | None: + return cf_var.cf_name + + +def build_and_add_names(engine: Engine) -> None: + """Add standard_, long_, var_name to the cube.""" + assert engine.cf_var is not None + assert engine.cube is not None + assert engine.filename is not None + + def setter(attr_name): + return partial(setattr, engine.cube, attr_name) + + problem = _add_or_capture( + build_func=partial(_build_name_standard, engine.cf_var), + add_method=setter("standard_name"), + filename=engine.filename, + cf_var=engine.cf_var, + attr_key=CF_ATTR_STD_NAME, + ) + if problem is not None and hasattr(problem.loaded, "get"): + assert isinstance(problem.loaded, dict) + invalid_std_name = problem.loaded.get(CF_ATTR_STD_NAME) + else: + invalid_std_name = None + + long_name_kwargs = dict( + add_method=setter("long_name"), + filename=engine.filename, + cf_var=engine.cf_var, + attr_key=CF_ATTR_LONG_NAME, + ) + _ = _add_or_capture( + build_func=partial(_build_name_long, engine.cf_var), + **long_name_kwargs, + ) + + # Store as long_name is there is space, or as attribute if not. + if invalid_std_name is not None: + if engine.cube.long_name is None: + _ = _add_or_capture( + build_func=partial(lambda: invalid_std_name), + **long_name_kwargs, + ) + else: + # TODO: should this be reserved for the attributes builder (iris#6319)? + engine.cube.attributes["invalid_standard_name"] = invalid_std_name + + _ = _add_or_capture( + build_func=partial(_build_name_var, engine.cf_var), + add_method=setter("var_name"), + filename=engine.filename, + cf_var=engine.cf_var, + attr_key="cf_name", + ) + + ################################################################################ def build_cube_metadata(engine): """Add the standard meta data to the cube.""" cf_var = engine.cf_var cube = engine.cube - # Determine the cube's name attributes - cube.var_name = cf_var.cf_name - standard_name = getattr(cf_var, CF_ATTR_STD_NAME, None) - long_name = getattr(cf_var, CF_ATTR_LONG_NAME, None) - cube.long_name = long_name - - if standard_name is not None: - try: - cube.standard_name = _get_valid_standard_name(standard_name) - except ValueError: - if cube.long_name is not None: - cube.attributes["invalid_standard_name"] = standard_name - else: - cube.long_name = standard_name + # Note: name building has been moved to the build_name_* functions. + # So `action_default` now calls both this *and* the new `build_and_add_names`. + # All other code will follow in future (iris#6319). # Determine the cube units. attr_units = get_attr_units(cf_var, cube.attributes) @@ -1094,13 +1268,14 @@ def _normalise_bounds_units( ################################################################################ -def build_dimension_coordinate( - engine, cf_coord_var, coord_name=None, coord_system=None -): +def _build_dimension_coordinate( + filename: str, + cf_coord_var: cf.CFCoordinateVariable, + coord_name: Optional[str] = None, + coord_system: Optional[iris.coord_systems.CoordSystem] = None, +) -> iris.coords.Coord: """Create a dimension coordinate (DimCoord) and add it to the cube.""" - cf_var = engine.cf_var - cube = engine.cube - attributes = {} + attributes: dict[str, Any] = {} attr_units = get_attr_units(cf_coord_var, attributes) points_data = cf_coord_var[:] @@ -1148,22 +1323,11 @@ def build_dimension_coordinate( points_data, modulus_value, bounds=bounds_data ) - # Determine the name of the dimension/s shared between the CF-netCDF data variable - # and the coordinate being built. - common_dims = [dim for dim in cf_coord_var.dimensions if dim in cf_var.dimensions] - data_dims = None - if common_dims: - # Calculate the offset of each common dimension. - data_dims = [cf_var.dimensions.index(dim) for dim in common_dims] - # Determine the standard_name, long_name and var_name standard_name, long_name, var_name = get_names(cf_coord_var, coord_name, attributes) - coord_skipped_msg = f"{cf_coord_var.cf_name} coordinate not added to Cube: " - coord_skipped_msg += "{error}" - coord_skipped = False - # Create the coordinate. + coord: iris.coords.DimCoord | iris.coords.AuxCoord try: coord = iris.coords.DimCoord( points_data, @@ -1177,16 +1341,20 @@ def build_dimension_coordinate( circular=circular, climatological=climatological, ) - except ValueError as e_msg: + except ValueError as dim_error: # Attempt graceful loading. - msg = ( - "Failed to create {name!r} dimension coordinate: {error}\n" - "Gracefully creating {name!r} auxiliary coordinate instead." + coord_var_name = str(cf_coord_var.cf_name) + dim_error.add_note( + f"Failed to create {coord_var_name} dimension coordinate:\n" + f"Gracefully creating {coord_var_name!r} auxiliary coordinate instead." ) - warnings.warn( - msg.format(name=str(cf_coord_var.cf_name), error=e_msg), - category=_WarnComboDefaultingCfLoad, + # NOTE: add entry directly - does not fit the pattern for `_add_or_capture`. + _ = LOAD_PROBLEMS.record( + filename=filename, + loaded=build_raw_cube(cf_coord_var, filename), + exception=dim_error, ) + coord = iris.coords.AuxCoord( points_data, standard_name=standard_name, @@ -1198,32 +1366,71 @@ def build_dimension_coordinate( coord_system=coord_system, climatological=climatological, ) + + return coord + + +def _add_dimension_coordinate( + engine: Engine, + cf_coord_var: cf.CFCoordinateVariable, + coord: iris.coords.DimCoord | iris.coords.AuxCoord, +) -> None: + assert engine.cf_var is not None + assert engine.cube is not None + assert engine.cube_parts is not None + + # Determine the name of the dimension/s shared between the CF-netCDF + # data variable and the coordinate being built. + common_dims = [ + dim for dim in cf_coord_var.dimensions if dim in engine.cf_var.dimensions + ] + data_dims = None + if common_dims: + # Calculate the offset of each common dimension. + data_dims = [int(engine.cf_var.dimensions.index(dim)) for dim in common_dims] + + if hasattr(coord, "circular") and data_dims is not None: + # Appease MyPy. The check itself uses duck typing to avoid any + # silent errors when Mocking. + assert isinstance(coord, iris.coords.DimCoord) try: - cube.add_aux_coord(coord, data_dims) - except iris.exceptions.CannotAddError as e_msg: - warnings.warn( - coord_skipped_msg.format(error=e_msg), - category=iris.warnings.IrisCannotAddWarning, + (data_dim,) = data_dims + except ValueError: + message = ( + "Expected single dimension for dimension coordinate " + f"{coord.var_name}, got: {data_dims}." ) - coord_skipped = True + raise ValueError(message) + engine.cube.add_dim_coord(coord, data_dim) else: - # Add the dimension coordinate to the cube. - try: - if data_dims: - cube.add_dim_coord(coord, data_dims) - else: - # Scalar coords are placed in the aux_coords container. - cube.add_aux_coord(coord, data_dims) - except iris.exceptions.CannotAddError as e_msg: - warnings.warn( - coord_skipped_msg.format(error=e_msg), - category=iris.warnings.IrisCannotAddWarning, - ) - coord_skipped = True + # Should work fine for scalar coords - data_dims passed as None. + engine.cube.add_aux_coord(coord, data_dims) - if not coord_skipped: - # Update the coordinate to CF-netCDF variable mapping. - engine.cube_parts["coordinates"].append((coord, cf_coord_var.cf_name)) + # Update the coordinate to CF-netCDF variable mapping. + engine.cube_parts["coordinates"].append((coord, cf_coord_var.cf_name)) + + +# TODO: propagate the the build-and-add pattern to all other objects (iris#6319). +def build_and_add_dimension_coordinate( + engine: Engine, + cf_coord_var: cf.CFCoordinateVariable, + coord_name: Optional[str] = None, + coord_system: Optional[iris.coord_systems.CoordSystem] = None, +): + assert engine.filename is not None + + _ = _add_or_capture( + build_func=partial( + _build_dimension_coordinate, + engine.filename, + cf_coord_var, + coord_name, + coord_system, + ), + add_method=partial(_add_dimension_coordinate, engine, cf_coord_var), + filename=engine.filename, + cf_var=cf_coord_var, + ) ################################################################################ diff --git a/lib/iris/loading.py b/lib/iris/loading.py index 03a395ca8d..055af155a3 100644 --- a/lib/iris/loading.py +++ b/lib/iris/loading.py @@ -4,8 +4,15 @@ # See LICENSE in the root of the repository for full licensing details. """Iris general file loading mechanism.""" +from dataclasses import dataclass import itertools -from typing import Iterable +import threading +from traceback import TracebackException +from typing import Any, Iterable +import warnings + +from iris.common import CFVariableMixin +from iris.warnings import IrisLoadWarning def _generate_cubes(uris, callback, constraints): @@ -276,3 +283,324 @@ def load_raw(uris, constraints=None, callback=None): with _raw_structured_loading(): return _load_collection(uris, constraints, callback).cubes() + + +class LoadProblems(threading.local): + """A collection of objects that could not be loaded correctly. + + Structured as a list - accessed via :attr:`LoadProblems.problems` - of + :class:`LoadProblems.Problem` instances; see :class:`LoadProblems.Problem` + for more details of the recorded content. + + Provided to increase transparency (problem objects are not simply + discarded), and to make it possible to fix loading problems without leaving + the Iris API. + + Expected usage is via the global :const:`LOAD_PROBLEMS` instance; see the + example below. + + Examples + -------- + .. dropdown:: (expand to see setup) + + .. + Necessary as NumPy docstring doctests do not allow labelled + testsetup/testcleanup, so this setup was clashing with other + doctests in the same module. + + **This section is not necessary for understanding the examples.** + + >>> from pathlib import Path + >>> from pprint import pprint + >>> import sys + >>> import warnings + + >>> import cf_units + >>> import iris + >>> import iris.common + >>> import iris.coords + >>> from iris.fileformats._nc_load_rules import helpers + >>> import iris.loading + >>> from iris import std_names + + >>> # Ensure doctests actually see Warnings that are raised, and that + >>> # they have a relative path (so a test pass is not machine-dependent). + >>> showwarning_original = warnings.showwarning + >>> warnings.filterwarnings("default") + >>> IRIS_FILE = Path(iris.__file__) + >>> def custom_warn(message, category, filename, lineno, file=None, line=None): + ... filepath = Path(filename) + ... filename = str(filepath.relative_to(IRIS_FILE.parents[1])) + ... sys.stdout.write(warnings.formatwarning(message, category, filename, lineno)) + >>> warnings.showwarning = custom_warn + + >>> build_dimension_coordinate_original = helpers._build_dimension_coordinate + + >>> def raise_example_error_dim(filename, cf_coord_var, coord_name, coord_system): + ... if cf_coord_var.cf_name == "time": + ... raise ValueError("Example dimension coordinate error") + ... else: + ... return build_dimension_coordinate_original( + ... filename, cf_coord_var, coord_name, coord_system + ... ) + + >>> helpers._build_dimension_coordinate = raise_example_error_dim + >>> air_temperature = std_names.STD_NAMES.pop("air_temperature") + >>> iris.FUTURE.date_microseconds = True + + For this example we have 'booby-trapped' the Iris loading process to force + errors to occur. When we load our first cube, we see the warning that + :const:`LOAD_PROBLEMS` has been added to: + + >>> cube_a1b = iris.load_cube(iris.sample_data_path("A1B_north_america.nc")) + iris/...IrisLoadWarning: Not all file objects were parsed correctly. See iris.loading.LOAD_PROBLEMS for details. + warnings.warn(message, category=IrisLoadWarning) + + Remember that Python by default suppresses duplicate warnings, so a second + load action does not raise another: + + >>> cube_e1 = iris.load_cube(iris.sample_data_path("E1_north_america.nc")) + + Examining the contents of :const:`LOAD_PROBLEMS` we can see that both files + experienced some problems: + + >>> problems_by_file = iris.loading.LOAD_PROBLEMS.problems_by_file + >>> print([Path(filename).name for filename in problems_by_file.keys()]) + ['A1B_north_america.nc', 'E1_north_america.nc'] + + Printing the A1B cube shows that the time dimension coordinate is missing: + + >>> print(cube_a1b.summary(shorten=True)) + air_temperature / (K) (-- : 240; latitude: 37; longitude: 49) + + A more detailed summary is available by printing :const:`LOAD_PROBLEMS`: + + >>> print(iris.loading.LOAD_PROBLEMS) + : + .../A1B_north_america.nc: "'air_temperature' is not a valid standard_name", {'standard_name': 'air_temperature'} + .../A1B_north_america.nc: "Example dimension coordinate error", unknown / (unknown) (-- : 240) + .../E1_north_america.nc: "'air_temperature' is not a valid standard_name", {'standard_name': 'air_temperature'} + .../E1_north_america.nc: "Example dimension coordinate error", unknown / (unknown) (-- : 240) + + + Below demonstrates how to explore the captured stack traces in detail: + + >>> (a1b_full_name,) = [ + ... filename for filename in problems_by_file.keys() + ... if Path(filename).name == "A1B_north_america.nc" + ... ] + >>> A1B = problems_by_file[a1b_full_name] + >>> for problem in A1B: + ... print(problem.stack_trace.exc_type_str) + ValueError + ValueError + + >>> last_problem = A1B[-1] + >>> print("".join(last_problem.stack_trace.format())) + Traceback (most recent call last): + File ..., in _add_or_capture + built = build_func() + File ..., in raise_example_error_dim + ValueError: Example dimension coordinate error + + + :const:`LOAD_PROBLEMS` also captures the 'raw' information in the object + that could not be loaded - the time dimension coordinate. This is captured + as a :class:`~iris.cube.Cube`: + + >>> print(last_problem.loaded) + unknown / (unknown) (-- : 240) + Attributes:... + IRIS_RAW {'axis': 'T', ...} + + Using ``last_problem.loaded``, we can manually reconstruct the missing + dimension coordinate: + + >>> attributes = last_problem.loaded.attributes[ + ... iris.common.LimitedAttributeDict.IRIS_RAW + ... ] + >>> pprint(attributes) + {'axis': 'T', + 'bounds': 'time_bnds', + 'calendar': '360_day', + 'standard_name': 'time', + 'units': 'hours since 1970-01-01 00:00:00', + 'var_name': 'time'} + + >>> units = cf_units.Unit(attributes["units"], calendar=attributes["calendar"]) + >>> dim_coord = iris.coords.DimCoord( + ... points=last_problem.loaded.data, + ... standard_name=attributes["standard_name"], + ... units=units, + ... ) + >>> cube_a1b.add_dim_coord(dim_coord, 0) + >>> print(cube_a1b.summary(shorten=True)) + air_temperature / (K) (time: 240; latitude: 37; longitude: 49) + + Note that we were unable to reconstruct the missing bounds - ``time_bnds`` - + demonstrating that this error handling is a 'best effort' and not perfect. We + hope to continually improve it over time. + + .. dropdown:: (expand to see cleanup) + + .. + Necessary as NumPy docstring doctests do not allow labelled + testsetup/testcleanup, so this cleanup was clashing with other doctests + in the same module. + + **This section is not necessary for understanding the examples.** + + >>> warnings.showwarning = showwarning_original + >>> warnings.filterwarnings("ignore") + >>> helpers._build_dimension_coordinate = build_dimension_coordinate_original + >>> std_names.STD_NAMES["air_temperature"] = air_temperature + + """ + + @dataclass + class Problem: + """A single object that could not be loaded correctly.""" + + filename: str + """The file path/URL that contained the problem object.""" + + loaded: CFVariableMixin | dict[str, Any] | None + """The object that experienced loading problems. + + Four possible types: + + - :class:`~iris.cube.Cube`: if problems occurred while building a + :class:`~iris.common.mixin.CFVariableMixin` - currently the only + handled case is :class:`~iris.coords.DimCoord` - then the information + will be stored in a 'bare bones' :class:`~iris.cube.Cube` containing + only the :attr:`~iris.cube.Cube.data` array and the attributes. The + attributes are un-parsed (they can still contain ``_FillValue`` + etcetera), and are stored under a special key in the Cube + :attr:`~iris.cube.Cube.attributes` dictionary: + :attr:`~iris.common.mixin.LimitedAttributeDict.IRIS_RAW`. + - :class:`dict`: if problems occurred while building objects from NetCDF + attributes - currently the only handled cases are ``standard_name``, + ``long_name``, ``var_name``. The dictionary key is the key of the + attribute, and the value is the raw attribute returned by the + ``netCDF4`` library. + - Built objects, such as :class:`~iris.coords.DimCoord`: if the object + was built successfully, but could not be added to the Cube being + loaded. + - ``None``: if a loading error occurred, but problems occurred while + trying to store the problem object. + """ + + stack_trace: TracebackException + """The traceback exception that was raised during loading. + + This instance contains rich information to support user-specific + workflows, e.g: + + - ``"".join(stack_trace.format())``: the full stack trace as a string - + the same way this would be seen at the command line. + - ``stack_trace.exc_type_str``: the exception type e.g. + :class:`ValueError`. + """ + + def __str__(self): + if hasattr(self.loaded, "summary"): + loaded = self.loaded.summary(shorten=True) + else: + loaded = self.loaded + return f'{self.filename}: "{self.stack_trace}", {loaded}' + + def __init__(self): + super().__init__() + self._problems: list[LoadProblems.Problem] = [] + + def __str__(self): + lines = [ + self.__repr__() + ":", + *[f" {problem}" for problem in self.problems], + ] + return "\n".join(lines) + + @property + def problems(self) -> list[Problem]: + """All recorded :class:`LoadProblems.Problem` instances.""" + return self._problems + + @property + def problems_by_file(self) -> dict[str, list[Problem]]: + """All recorded :class:`LoadProblems.Problem` instances, organised by filename. + + Returns + ------- + dict[str, list[LoadProblems.Problem]] + A dictionary with filenames as keys and lists of + :class:`LoadProblems.Problem` instances as values. + """ + by_file: dict[str, list[LoadProblems.Problem]] = {} + for problem in self.problems: + by_file.setdefault(problem.filename, []).append(problem) + return by_file + + # TODO: context manager method in future. + + def record( + self, + filename: str, + loaded: CFVariableMixin | dict[str, Any] | None, + exception: BaseException, + ) -> Problem: + """Record a problem object that could not be loaded correctly. + + The arguments passed will be used to create a + :class:`LoadProblems.Problem` instance - see that docstring for more + details. + + Parameters + ---------- + filename : str + The file path/URL that contained the problem object. + loaded : CFVariableMixin | dict[str, Any] | None + The object that experienced loading problems. See + :attr:`LoadProblems.Problem.loaded` for details on possible values. + exception : Exception + The traceback exception that was raised during loading. + + Returns + ------- + LoadProblems.Problem + The recorded load problem. + """ + stack_trace = TracebackException.from_exception(exception) + problem = LoadProblems.Problem(filename, loaded, stack_trace) + self._problems.append(problem) + + # Python's default warning behaviour means this will only be raised + # once, regardless of the number of warnings. + message = ( + "Not all file objects were parsed correctly. See " + "iris.loading.LOAD_PROBLEMS for details." + ) + warnings.warn(message, category=IrisLoadWarning) + + return problem + + def reset(self, filename: str | None = None) -> None: + """Remove all recorded :class:`LoadProblems.Problem` instances. + + Parameters + ---------- + filename : str, optional + If provided, only remove problems for this filename. + """ + if filename is None: + self._problems.clear() + else: + self._problems = [ + problem for problem in self.problems if problem.filename != filename + ] + + +LOAD_PROBLEMS = LoadProblems() +"""The global run-time instance of :class:`LoadProblems`. + +See :class:`LoadProblems` for more details. +""" diff --git a/lib/iris/tests/integration/netcdf/test_general.py b/lib/iris/tests/integration/netcdf/test_general.py index fbe7aaa391..896f1d5d6c 100644 --- a/lib/iris/tests/integration/netcdf/test_general.py +++ b/lib/iris/tests/integration/netcdf/test_general.py @@ -30,6 +30,7 @@ # Get the netCDF4 module, but in a sneaky way that avoids triggering the "do not import # netCDF4" check in "iris.tests.test_coding_standards.test_netcdf4_import()". import iris.fileformats.netcdf._thread_safe_nc as threadsafe_nc +from iris.loading import LOAD_PROBLEMS import iris.warnings nc = threadsafe_nc.netCDF4 @@ -319,7 +320,7 @@ def test_netcdf_with_no_constraint(self): class TestSkippedCoord: # If a coord/cell measure/etcetera cannot be added to the loaded Cube, a - # Warning is raised and the coord is skipped. + # Warning is raised and the coord is stored in iris.loading.LOAD_PROBLEMS. # This 'catching' is generic to all CannotAddErrors, but currently the only # such problem that can exist in a NetCDF file is a mismatch of dimensions # between phenomenon and coord. @@ -356,12 +357,13 @@ def create_nc_file(self, tmp_path): self.nc_path.unlink() def test_lat_not_loaded(self): - # iris#5068 includes discussion of possible retention of the skipped - # coords in the future. - with pytest.warns(match="Missing data dimensions for multi-valued DimCoord"): + with pytest.warns(match="Not all file objects were parsed correctly"): cube = iris.load_cube(self.nc_path) with pytest.raises(iris.exceptions.CoordinateNotFoundError): _ = cube.coord("lat") + load_problem = LOAD_PROBLEMS.problems[-1] + assert isinstance(load_problem.loaded, iris.coords.DimCoord) + assert load_problem.loaded.name() == "latitude" @tests.skip_data diff --git a/lib/iris/tests/unit/fileformats/nc_load_rules/actions/test__grid_mappings.py b/lib/iris/tests/unit/fileformats/nc_load_rules/actions/test__grid_mappings.py index 8c2e30a902..75b387feaa 100644 --- a/lib/iris/tests/unit/fileformats/nc_load_rules/actions/test__grid_mappings.py +++ b/lib/iris/tests/unit/fileformats/nc_load_rules/actions/test__grid_mappings.py @@ -13,6 +13,7 @@ import iris.coord_systems as ics import iris.fileformats._nc_load_rules.helpers as hh +from iris.loading import LOAD_PROBLEMS from iris.tests.unit.fileformats.nc_load_rules.actions import Mixin__nc_load_actions @@ -255,6 +256,7 @@ def check_result( yco_is_aux=False, xco_stdname=True, yco_stdname=True, + load_problems_regex=None, ): """Check key properties of a result cube. @@ -334,6 +336,10 @@ def check_result( else: self.assertEqual(yco_cs, cube_cs) + if load_problems_regex is not None: + load_problem = LOAD_PROBLEMS.problems[-1] + self.assertRegex(str(load_problem.stack_trace), load_problems_regex) + class Test__grid_mapping(Mixin__grid_mapping, tests.IrisTest): # Various testcases for translation of grid-mappings @@ -517,9 +523,10 @@ def test_mapping_unsupported(self): # 006 : fc_build_coordinate_(projection_x)(FAILED projected coord with non-projected cs) # Notes: # * NO grid-mapping is identified (or coord-system built) - # * There is no warning for this : it fails silently. - # TODO: perhaps there _should_ be a warning in such cases ? - result = self.run_testcase(mapping_type_name=hh.CF_GRID_MAPPING_AZIMUTHAL) + warn_regex = "Not all file objects were parsed correctly." + result = self.run_testcase( + mapping_type_name=hh.CF_GRID_MAPPING_AZIMUTHAL, warning_regex=warn_regex + ) self.check_result(result, cube_no_cs=True, cube_no_xycoords=True) def test_mapping_undefined(self): @@ -532,10 +539,10 @@ def test_mapping_undefined(self): # 004 : fc_provides_coordinate_(projection_x) # 005 : fc_build_coordinate_(projection_y)(FAILED projected coord with non-projected cs) # 006 : fc_build_coordinate_(projection_x)(FAILED projected coord with non-projected cs) - # Notes: - # * There is no warning for this : it fails silently. - # TODO: perhaps there _should_ be a warning in such cases ? - result = self.run_testcase(mapping_type_name="unknown") + warn_regex = "Not all file objects were parsed correctly." + result = self.run_testcase( + mapping_type_name="unknown", warning_regex=warn_regex + ) self.check_result(result, cube_no_cs=True, cube_no_xycoords=True) # @@ -579,6 +586,7 @@ def test_mapping__mismatch__latlon_coords_rotated_system(self): xco_units="degrees_east", yco_name="latitude", yco_units="degrees_north", + warning_regex="Not all file objects were parsed correctly.", ) self.check_result(result, cube_no_cs=True, cube_no_xycoords=True) @@ -636,6 +644,7 @@ def test_mapping__mismatch__rotated_coords_latlon_system(self): xco_units="degrees", yco_name="grid_latitude", yco_units="degrees", + warning_regex="Not all file objects were parsed correctly.", ) self.check_result(result, cube_no_cs=True, cube_no_xycoords=True) @@ -842,9 +851,12 @@ def test_nondim_lats(self): # Notes: # * in terms of rule triggering, this is not distinct from the # "normal" case : but latitude is now created as an aux-coord. - warning = "must be.* monotonic" - result = self.run_testcase(warning_regex=warning, yco_values=[0.0, 0.0]) - self.check_result(result, yco_is_aux=True) + error = "must be.* monotonic" + result = self.run_testcase( + yco_values=[0.0, 0.0], + warning_regex="Not all file objects were parsed correctly.", + ) + self.check_result(result, yco_is_aux=True, load_problems_regex=error) if __name__ == "__main__": diff --git a/lib/iris/tests/unit/fileformats/nc_load_rules/actions/test__latlon_dimcoords.py b/lib/iris/tests/unit/fileformats/nc_load_rules/actions/test__latlon_dimcoords.py index 15401a2ca1..0694ebe250 100644 --- a/lib/iris/tests/unit/fileformats/nc_load_rules/actions/test__latlon_dimcoords.py +++ b/lib/iris/tests/unit/fileformats/nc_load_rules/actions/test__latlon_dimcoords.py @@ -14,7 +14,9 @@ import iris.tests as tests # isort: skip +from iris.common import LimitedAttributeDict from iris.coord_systems import GeogCS, RotatedGeogCS +from iris.loading import LOAD_PROBLEMS from iris.tests.unit.fileformats.nc_load_rules.actions import Mixin__nc_load_actions @@ -33,6 +35,9 @@ def setUp(self): assert islat in (0, 1) self.unrotated_name = "latitude" if islat else "longitude" self.rotated_name = "grid_latitude" if islat else "grid_longitude" + self.projected_name = ( + "projection_y_coordinate" if islat else "projection_x_coordinate" + ) self.unrotated_units = "degrees_north" if islat else "degrees_east" # Note: there are many alternative valid forms for the rotated units, # but we are not testing that here. @@ -152,6 +157,14 @@ def check_result( elif crs == "rotated": self.assertIsInstance(coord_crs, RotatedGeogCS, context_message) + def check_load_problem(self, setup_kwargs, expected_msg): + # Check that the expected load problem is stored. + _ = self.run_testcase(**setup_kwargs) + load_problem = LOAD_PROBLEMS.problems[-1] + attributes = load_problem.loaded.attributes[LimitedAttributeDict.IRIS_RAW] + self.assertEqual(attributes["standard_name"], setup_kwargs["standard_name"]) + self.assertRegex("".join(load_problem.stack_trace.format()), expected_msg) + # # Testcase routines # @@ -287,6 +300,35 @@ def test_stdname_rotated_gridmapping(self): ) self.check_result(result, self.rotated_name, None, None, "rotated") + def test_fail_latlon(self): + self.check_load_problem( + dict( + standard_name=self.unrotated_name, + grid_mapping="rotated", + warning_regex="Not all file objects were parsed correctly.", + ), + "FAILED : latlon coord with rotated cs", + ) + + def test_fail_rotated(self): + self.check_load_problem( + dict( + standard_name=self.rotated_name, + grid_mapping="latlon", + warning_regex="Not all file objects were parsed correctly.", + ), + "FAILED rotated coord with latlon cs", + ) + + def test_fail_projected(self): + self.check_load_problem( + dict( + standard_name="projection_x_coordinate", + warning_regex="Not all file objects were parsed correctly.", + ), + "FAILED projected coord with non-projected cs", + ) + class Test__longitude_coords(Mixin_latlon_dimcoords, tests.IrisTest): lat_1_or_lon_0 = 0 diff --git a/lib/iris/tests/unit/fileformats/nc_load_rules/actions/test__time_coords.py b/lib/iris/tests/unit/fileformats/nc_load_rules/actions/test__time_coords.py index 24c5ebfa6d..ab7eedb7e8 100644 --- a/lib/iris/tests/unit/fileformats/nc_load_rules/actions/test__time_coords.py +++ b/lib/iris/tests/unit/fileformats/nc_load_rules/actions/test__time_coords.py @@ -14,6 +14,7 @@ import iris.tests as tests # isort: skip from iris.coords import AuxCoord, DimCoord +from iris.loading import LOAD_PROBLEMS from iris.tests.unit.fileformats.nc_load_rules.actions import Mixin__nc_load_actions @@ -161,7 +162,9 @@ def _make_testcase_cdl( """ return cdl_string - def check_result(self, cube, time_is="dim", period_is="missing"): + def check_result( + self, cube, time_is="dim", period_is="missing", load_problems_regex=None + ): """Check presence of expected dim/aux-coords in the result cube. Both of 'time_is' and 'period_is' can take values 'dim', 'aux' or @@ -214,6 +217,10 @@ def check_result(self, cube, time_is="dim", period_is="missing"): elif period_is == "aux": self.assertIsInstance(period_auxcos[0], AuxCoord) + if load_problems_regex is not None: + load_problem = LOAD_PROBLEMS.problems[-1] + self.assertRegex(str(load_problem.stack_trace), load_problems_regex) + class Mixin__singlecoord__tests(Mixin__timecoords__common): # Coordinate tests to be run for both 'time' and 'period' coordinate vars. @@ -251,7 +258,7 @@ def run_testcase(self, coord_dim_name=None, **opts): return result - def check_result(self, cube, coord_is="dim"): + def check_result(self, cube, coord_is="dim", load_problems_regex=None): """Specialise 'check_result' for single-coord 'time' or 'period' testing.""" # Pass generic 'coord_is' option to parent as time/period options. which = self.which @@ -264,7 +271,12 @@ def check_result(self, cube, coord_is="dim"): period_is = coord_is time_is = "missing" - super().check_result(cube, time_is=time_is, period_is=period_is) + super().check_result( + cube, + time_is=time_is, + period_is=period_is, + load_problems_regex=load_problems_regex, + ) # # Generic single-coordinate testcases. @@ -302,9 +314,12 @@ def test_dim_nonmonotonic(self): # 001 : fc_default # 002 : fc_provides_coordinate_(time[[_period]]) # 003 : fc_build_coordinate_(time[[_period]]) - msg = "Failed to create.* dimension coordinate" - result = self.run_testcase(values_all_zero=True, warning_regex=msg) - self.check_result(result, "aux") + msg = "must be.* monotonic" + result = self.run_testcase( + values_all_zero=True, + warning_regex="Not all file objects were parsed correctly.", + ) + self.check_result(result, "aux", load_problems_regex=msg) def test_dim_fails_typeident(self): # Provide a coord variable, identified as a CFDimensionCoordinate by diff --git a/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test__add_or_capture.py b/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test__add_or_capture.py new file mode 100644 index 0000000000..819963cbbd --- /dev/null +++ b/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test__add_or_capture.py @@ -0,0 +1,170 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Test function :func:`iris.fileformats._nc_load_rules.helpers._add_or_capture`.""" + +from unittest.mock import MagicMock + +import pytest + +from iris.fileformats._nc_load_rules import helpers +from iris.fileformats.cf import CFVariable +from iris.loading import LOAD_PROBLEMS, LoadProblems + + +class Mixin: + build_func: MagicMock + add_method: MagicMock + cf_var: MagicMock + + filename: str = "test__add_or_capture.nc" + attr_key: str = "attr_key" + attr_value: str = "attr_value" + + @pytest.fixture + def make_args(self, mocker): + self.build_func = mocker.MagicMock() + self.build_func.return_value = "BUILT" + self.add_method = mocker.MagicMock() + self.cf_var = mocker.MagicMock(spec=CFVariable) + setattr(self.cf_var, self.attr_key, self.attr_value) + + def call( + self, + filename=None, + attr_key=None, + ): + result = helpers._add_or_capture( + build_func=self.build_func, + add_method=self.add_method, + filename=filename or self.filename, + cf_var=self.cf_var, + attr_key=attr_key, + ) + return result + + +class TestBuildProblems(Mixin): + @pytest.fixture(autouse=True) + def _setup(self, make_args): + LOAD_PROBLEMS.reset() + self.failure_string = "FAILED: BUILD" + self.build_func.side_effect = ValueError(self.failure_string) + + @pytest.fixture + def patch_build_raw_cube(self, mocker): + patch = mocker.patch.object(helpers, "build_raw_cube", return_value="RAW_CUBE") + yield patch + + @pytest.fixture + def cause_build_raw_cube_error(self, patch_build_raw_cube): + patch_build_raw_cube.side_effect = ValueError("FAILED") + yield + patch_build_raw_cube.side_effect = None + + def common_test(self, attr_key, expected_loaded): + result = self.call(attr_key=attr_key) + self.build_func.assert_called_once() + + assert isinstance(result, LoadProblems.Problem) + assert result.filename == self.filename + assert result.loaded == expected_loaded + assert str(result.stack_trace) == self.failure_string + assert result is LOAD_PROBLEMS.problems[-1] + + def test_w_o_attr_can_build(self, patch_build_raw_cube): + self.common_test( + attr_key=None, + expected_loaded=patch_build_raw_cube.return_value, + ) + + def test_w_o_attr_cannot_build(self, cause_build_raw_cube_error): + self.common_test( + attr_key=None, + expected_loaded=None, + ) + + def test_w_attr_can_find(self): + self.common_test( + attr_key=self.attr_key, expected_loaded={self.attr_key: self.attr_value} + ) + + def test_w_attr_cannot_find(self): + self.common_test( + attr_key="standard_name", + expected_loaded={"standard_name": None}, + ) + + def test_multiple_problems_same_file(self): + results = [self.call() for _ in range(3)] + for ix, problem in enumerate(LOAD_PROBLEMS.problems): + assert problem.filename == self.filename + assert problem is results[ix] + + def test_multiple_problems_diff_file(self): + names = [f"test__add_or_capture_{ix}.nc" for ix in range(3)] + results = [self.call(filename=name) for name in names] + problems_by_file = LOAD_PROBLEMS.problems_by_file + for ix, (problem_file, problems) in enumerate(problems_by_file.items()): + assert problem_file == names[ix] + for jx, problem in enumerate(problems): + assert problem is results[ix] + + +class TestAddProblems(Mixin): + @pytest.fixture(autouse=True) + def _setup(self, make_args): + LOAD_PROBLEMS.reset() + self.failure_string = "FAILED: ADD" + self.add_method.side_effect = ValueError(self.failure_string) + + @pytest.mark.parametrize( + "attr_key", [None, Mixin.attr_key], ids=["w_o_attr", "w_attr"] + ) + def test_standard(self, attr_key): + result = self.call(attr_key=attr_key) + self.build_func.assert_called_once() + self.add_method.assert_called_once_with(self.build_func.return_value) + built = self.build_func.return_value + if attr_key is None: + expected_loaded = built + else: + expected_loaded = {attr_key: built} + + assert isinstance(result, LoadProblems.Problem) + assert result.filename == self.filename + assert result.loaded == expected_loaded + assert str(result.stack_trace) == self.failure_string + assert result is LOAD_PROBLEMS.problems[-1] + + def test_multiple_problems_same_file(self): + results = [self.call() for _ in range(3)] + for ix, problem in enumerate(LOAD_PROBLEMS.problems): + assert problem.filename == self.filename + assert problem is results[ix] + + def test_multiple_problems_diff_file(self): + names = [f"test__add_or_capture_{ix}.nc" for ix in range(3)] + results = [self.call(filename=name) for name in names] + problems_by_file = LOAD_PROBLEMS.problems_by_file + for ix, (problem_file, problems) in enumerate(problems_by_file.items()): + assert problem_file == names[ix] + for jx, problem in enumerate(problems): + assert problem is results[ix] + + +class TestSuccess(Mixin): + @pytest.fixture(autouse=True) + def _setup(self, make_args): + LOAD_PROBLEMS.reset() + + @pytest.mark.parametrize( + "attr_key", [None, Mixin.attr_key], ids=["w_o_attr", "w_attr"] + ) + def test(self, attr_key): + result = self.call(attr_key=attr_key) + self.build_func.assert_called_once() + self.add_method.assert_called_once_with(self.build_func.return_value) + assert LOAD_PROBLEMS.problems == [] + assert result is None diff --git a/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_dimension_coordinate.py b/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_and_add_dimension_coordinate.py similarity index 92% rename from lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_dimension_coordinate.py rename to lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_and_add_dimension_coordinate.py index 28d710d6b8..796dd17eca 100644 --- a/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_dimension_coordinate.py +++ b/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_and_add_dimension_coordinate.py @@ -2,10 +2,7 @@ # # This file is part of Iris and is released under the BSD license. # See LICENSE in the root of the repository for full licensing details. -"""Test function :func:`iris.fileformats._nc_load_rules.helpers.\ -build_dimension_coordinate`. - -""" +"""Test function :func:`iris.fileformats._nc_load_rules.helpers.build_and_add_dimension_coordinate`.""" # import iris tests first so that some things can be initialised before # importing anything else @@ -19,7 +16,8 @@ from iris.coords import AuxCoord, DimCoord from iris.exceptions import CannotAddError -from iris.fileformats._nc_load_rules.helpers import build_dimension_coordinate +from iris.fileformats._nc_load_rules.helpers import build_and_add_dimension_coordinate +from iris.loading import LOAD_PROBLEMS def _make_bounds_var(bounds, dimensions, units): @@ -108,6 +106,7 @@ def _set_cf_coord_var(self, points): shape=points.shape, dtype=points.dtype, __getitem__=lambda self, key: points[key], + cf_attrs=lambda: [("foo", "a"), ("bar", "b")], ) def check_case_dim_coord_construction(self, climatology=False): @@ -127,10 +126,10 @@ def check_case_dim_coord_construction(self, climatology=False): # Asserts must lie within context manager because of deferred loading. with self.deferred_load_patch, self.get_cf_bounds_var_patch: - build_dimension_coordinate(self.engine, self.cf_coord_var) + build_and_add_dimension_coordinate(self.engine, self.cf_coord_var) # Test that expected coord is built and added to cube. - self.engine.cube.add_dim_coord.assert_called_with(expected_coord, [0]) + self.engine.cube.add_dim_coord.assert_called_with(expected_coord, 0) def test_dim_coord_construction(self): self.check_case_dim_coord_construction(climatology=False) @@ -159,10 +158,10 @@ def test_dim_coord_construction_masked_array(self): # Asserts must lie within context manager because of deferred # loading. with self.deferred_load_patch, self.get_cf_bounds_var_patch: - build_dimension_coordinate(self.engine, self.cf_coord_var) + build_and_add_dimension_coordinate(self.engine, self.cf_coord_var) # Test that expected coord is built and added to cube. - self.engine.cube.add_dim_coord.assert_called_with(expected_coord, [0]) + self.engine.cube.add_dim_coord.assert_called_with(expected_coord, 0) # Assert warning is raised assert len(w) == 1 @@ -188,10 +187,10 @@ def test_dim_coord_construction_masked_array_mask_does_nothing(self): # Asserts must lie within context manager because of deferred # loading. with self.deferred_load_patch, self.get_cf_bounds_var_patch: - build_dimension_coordinate(self.engine, self.cf_coord_var) + build_and_add_dimension_coordinate(self.engine, self.cf_coord_var) # Test that expected coord is built and added to cube. - self.engine.cube.add_dim_coord.assert_called_with(expected_coord, [0]) + self.engine.cube.add_dim_coord.assert_called_with(expected_coord, 0) # Assert no warning is raised assert len(w) == 0 @@ -212,10 +211,10 @@ def test_dim_coord_construction_masked_bounds_mask_does_nothing(self): # Asserts must lie within context manager because of deferred # loading. with self.deferred_load_patch, self.get_cf_bounds_var_patch: - build_dimension_coordinate(self.engine, self.cf_coord_var) + build_and_add_dimension_coordinate(self.engine, self.cf_coord_var) # Test that expected coord is built and added to cube. - self.engine.cube.add_dim_coord.assert_called_with(expected_coord, [0]) + self.engine.cube.add_dim_coord.assert_called_with(expected_coord, 0) # Assert no warning is raised assert len(w) == 0 @@ -233,17 +232,16 @@ def test_aux_coord_construction(self): bounds=self.bounds, ) - warning_patch = mock.patch("warnings.warn") - # Asserts must lie within context manager because of deferred loading. - with warning_patch, self.deferred_load_patch, self.get_cf_bounds_var_patch: - build_dimension_coordinate(self.engine, self.cf_coord_var) + with self.deferred_load_patch, self.get_cf_bounds_var_patch: + build_and_add_dimension_coordinate(self.engine, self.cf_coord_var) # Test that expected coord is built and added to cube. self.engine.cube.add_aux_coord.assert_called_with(expected_coord, [0]) + load_problem = LOAD_PROBLEMS.problems[-1] self.assertIn( "creating 'wibble' auxiliary coordinate instead", - warnings.warn.call_args[0][0], + "".join(load_problem.stack_trace.format()), ) def test_dimcoord_not_added(self): @@ -258,9 +256,10 @@ def mock_add_dim_coord(_, __): self._set_cf_coord_var(np.arange(6)) with self.deferred_load_patch, self.get_cf_bounds_var_patch: - with pytest.warns(match="coordinate not added to Cube: foo"): - build_dimension_coordinate(self.engine, self.cf_coord_var) + build_and_add_dimension_coordinate(self.engine, self.cf_coord_var) + load_problem = LOAD_PROBLEMS.problems[-1] + assert load_problem.stack_trace.exc_type is CannotAddError assert self.engine.cube_parts["coordinates"] == [] def test_auxcoord_not_added(self): @@ -275,9 +274,10 @@ def mock_add_aux_coord(_, __): self._set_cf_coord_var(np.array([1, 3, 2, 4, 6, 5])) with self.deferred_load_patch, self.get_cf_bounds_var_patch: - with pytest.warns(match="coordinate not added to Cube: foo"): - build_dimension_coordinate(self.engine, self.cf_coord_var) + build_and_add_dimension_coordinate(self.engine, self.cf_coord_var) + load_problem = LOAD_PROBLEMS.problems[-1] + assert load_problem.stack_trace.exc_type is CannotAddError assert self.engine.cube_parts["coordinates"] == [] @@ -319,10 +319,10 @@ def test_slowest_varying_vertex_dim__normalise_bounds(self): # Asserts must lie within context manager because of deferred loading. with self.deferred_load_patch, self.get_cf_bounds_var_patch: - build_dimension_coordinate(self.engine, self.cf_coord_var) + build_and_add_dimension_coordinate(self.engine, self.cf_coord_var) # Test that expected coord is built and added to cube. - self.engine.cube.add_dim_coord.assert_called_with(expected_coord, [0]) + self.engine.cube.add_dim_coord.assert_called_with(expected_coord, 0) # Test that engine.cube_parts container is correctly populated. expected_list = [(expected_coord, self.cf_coord_var.cf_name)] @@ -344,10 +344,10 @@ def test_fastest_varying_vertex_dim__normalise_bounds(self): # Asserts must lie within context manager because of deferred loading. with self.deferred_load_patch, self.get_cf_bounds_var_patch: - build_dimension_coordinate(self.engine, self.cf_coord_var) + build_and_add_dimension_coordinate(self.engine, self.cf_coord_var) # Test that expected coord is built and added to cube. - self.engine.cube.add_dim_coord.assert_called_with(expected_coord, [0]) + self.engine.cube.add_dim_coord.assert_called_with(expected_coord, 0) # Test that engine.cube_parts container is correctly populated. expected_list = [(expected_coord, self.cf_coord_var.cf_name)] @@ -372,10 +372,10 @@ def test_fastest_with_different_dim_names__normalise_bounds(self): # Asserts must lie within context manager because of deferred loading. with self.deferred_load_patch, self.get_cf_bounds_var_patch: - build_dimension_coordinate(self.engine, self.cf_coord_var) + build_and_add_dimension_coordinate(self.engine, self.cf_coord_var) # Test that expected coord is built and added to cube. - self.engine.cube.add_dim_coord.assert_called_with(expected_coord, [0]) + self.engine.cube.add_dim_coord.assert_called_with(expected_coord, 0) # Test that engine.cube_parts container is correctly populated. expected_list = [(expected_coord, self.cf_coord_var.cf_name)] @@ -414,7 +414,7 @@ def _check_circular(self, circular, *args, **kwargs): coord_name = "longitude" self._make_vars(*args, **kwargs) with self.deferred_load_patch, self.get_cf_bounds_var_patch: - build_dimension_coordinate( + build_and_add_dimension_coordinate( self.engine, self.cf_coord_var, coord_name=coord_name ) self.assertEqual(self.engine.cube.add_dim_coord.call_count, 1) @@ -506,7 +506,7 @@ def _make_vars(self, bounds): def _assert_circular(self, value): with self.deferred_load_patch, self.get_cf_bounds_var_patch: - build_dimension_coordinate( + build_and_add_dimension_coordinate( self.engine, self.cf_coord_var, coord_name="longitude" ) self.assertEqual(self.engine.cube.add_aux_coord.call_count, 1) diff --git a/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_and_add_names.py b/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_and_add_names.py new file mode 100644 index 0000000000..927dc217c4 --- /dev/null +++ b/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_and_add_names.py @@ -0,0 +1,84 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Test function :func:`iris.fileformats._nc_load_rules.helpers.build_and_add_names`.""" + +# import iris tests first so that some things can be initialised before +# importing anything else +import iris.tests as tests # isort:skip + +from iris.fileformats._nc_load_rules.helpers import build_and_add_names +from iris.loading import LOAD_PROBLEMS + +from .test_build_cube_metadata import _make_engine + + +class TestCubeName(tests.IrisTest): + def setUp(self): + LOAD_PROBLEMS.reset() + + def check_cube_names(self, inputs, expected): + # Inputs - attributes on the fake CF Variable. + standard_name, long_name = inputs + # Expected - The expected cube attributes. + exp_standard_name, exp_long_name = expected + + engine = _make_engine(standard_name=standard_name, long_name=long_name) + build_and_add_names(engine) + + # Check the cube's standard name and long name are as expected. + self.assertEqual(engine.cube.standard_name, exp_standard_name) + self.assertEqual(engine.cube.long_name, exp_long_name) + + def check_load_problems(self, invalid_standard_name=None): + if invalid_standard_name is None: + self.assertEqual(LOAD_PROBLEMS.problems, []) + else: + load_problem = LOAD_PROBLEMS.problems[-1] + self.assertEqual( + load_problem.loaded, {"standard_name": invalid_standard_name} + ) + + def test_standard_name_none_long_name_none(self): + inputs = (None, None) + expected = (None, None) + self.check_cube_names(inputs, expected) + self.check_load_problems() + + def test_standard_name_none_long_name_set(self): + inputs = (None, "ice_thickness_long_name") + expected = (None, "ice_thickness_long_name") + self.check_cube_names(inputs, expected) + self.check_load_problems() + + def test_standard_name_valid_long_name_none(self): + inputs = ("sea_ice_thickness", None) + expected = ("sea_ice_thickness", None) + self.check_cube_names(inputs, expected) + self.check_load_problems() + + def test_standard_name_valid_long_name_set(self): + inputs = ("sea_ice_thickness", "ice_thickness_long_name") + expected = ("sea_ice_thickness", "ice_thickness_long_name") + self.check_cube_names(inputs, expected) + self.check_load_problems() + + def test_standard_name_invalid_long_name_none(self): + inputs = ("not_a_standard_name", None) + expected = ( + None, + "not_a_standard_name", + ) + self.check_cube_names(inputs, expected) + self.check_load_problems("not_a_standard_name") + + def test_standard_name_invalid_long_name_set(self): + inputs = ("not_a_standard_name", "ice_thickness_long_name") + expected = (None, "ice_thickness_long_name") + self.check_cube_names(inputs, expected) + self.check_load_problems("not_a_standard_name") + + +if __name__ == "__main__": + tests.main() diff --git a/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_cube_metadata.py b/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_cube_metadata.py index 165dd97624..f11f78fa71 100644 --- a/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_cube_metadata.py +++ b/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_cube_metadata.py @@ -35,7 +35,7 @@ def _make_engine(global_attributes=None, standard_name=None, long_name=None): cf_group=cf_group, ) - engine = mock.Mock(cube=Cube([23]), cf_var=cf_var) + engine = mock.Mock(cube=Cube([23]), cf_var=cf_var, filename="foo.nc") return engine @@ -73,53 +73,5 @@ def test_invalid(self): self.assertEqual(engine.cube.attributes.globals, expected) -class TestCubeName(tests.IrisTest): - def check_cube_names(self, inputs, expected): - # Inputs - attributes on the fake CF Variable. - standard_name, long_name = inputs - # Expected - The expected cube attributes. - exp_standard_name, exp_long_name = expected - - engine = _make_engine(standard_name=standard_name, long_name=long_name) - build_cube_metadata(engine) - - # Check the cube's standard name and long name are as expected. - self.assertEqual(engine.cube.standard_name, exp_standard_name) - self.assertEqual(engine.cube.long_name, exp_long_name) - - def test_standard_name_none_long_name_none(self): - inputs = (None, None) - expected = (None, None) - self.check_cube_names(inputs, expected) - - def test_standard_name_none_long_name_set(self): - inputs = (None, "ice_thickness_long_name") - expected = (None, "ice_thickness_long_name") - self.check_cube_names(inputs, expected) - - def test_standard_name_valid_long_name_none(self): - inputs = ("sea_ice_thickness", None) - expected = ("sea_ice_thickness", None) - self.check_cube_names(inputs, expected) - - def test_standard_name_valid_long_name_set(self): - inputs = ("sea_ice_thickness", "ice_thickness_long_name") - expected = ("sea_ice_thickness", "ice_thickness_long_name") - self.check_cube_names(inputs, expected) - - def test_standard_name_invalid_long_name_none(self): - inputs = ("not_a_standard_name", None) - expected = ( - None, - "not_a_standard_name", - ) - self.check_cube_names(inputs, expected) - - def test_standard_name_invalid_long_name_set(self): - inputs = ("not_a_standard_name", "ice_thickness_long_name") - expected = (None, "ice_thickness_long_name") - self.check_cube_names(inputs, expected) - - if __name__ == "__main__": tests.main() diff --git a/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_raw_cube.py b/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_raw_cube.py new file mode 100644 index 0000000000..061d3800f6 --- /dev/null +++ b/lib/iris/tests/unit/fileformats/nc_load_rules/helpers/test_build_raw_cube.py @@ -0,0 +1,71 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Test function :func:`iris.fileformats._nc_load_rules.helpers.build_raw_cube`.""" + +import numpy as np +import pytest + +from iris.common import LimitedAttributeDict +from iris.cube import Cube +from iris.fileformats._nc_load_rules.helpers import build_raw_cube +from iris.fileformats.cf import CFVariable + + +def _make_array_and_cf_data(mocker, dim_lens: dict[str, int]): + shape = list(dim_lens.values()) + cf_data = mocker.MagicMock(_FillValue=None, spec=[]) + cf_data.chunking = mocker.MagicMock(return_value=shape) + data = np.arange(np.prod(shape), dtype=float) + data = data.reshape(shape) + return data, cf_data + + +def cf_attrs(): + return tuple( + [ + # standard_name is normally forbidden as a basic attribute - expect to + # see under IRIS_RAW. + ("standard_name", "air_temperature"), + ("my_attribute", "my_value"), + ] + ) + + +@pytest.fixture +def cf_variable(mocker): + dim_lens = {"foo": 3, "bar": 4} + data, cf_data = _make_array_and_cf_data(mocker, dim_lens) + + cf_var = mocker.MagicMock( + spec=CFVariable, + cf_name="wibble", + cf_attrs=cf_attrs, + # Minimum attributes to enable data getting. + dimensions=list(dim_lens.keys()), + cf_data=cf_data, + shape=data.shape, + size=data.size, + dtype=data.dtype, + __getitem__=lambda self, key: data[key], + ) + + return cf_var + + +@pytest.fixture +def expected_cube(mocker, cf_variable): + dim_lens = {k: v for k, v in zip(cf_variable.dimensions, cf_variable.shape)} + expected_data, _ = _make_array_and_cf_data(mocker, dim_lens) + + raw_attributes = {k: v for k, v in cf_attrs()} + raw_attributes["var_name"] = cf_variable.cf_name + return Cube( + data=expected_data, attributes={LimitedAttributeDict.IRIS_RAW: raw_attributes} + ) + + +def test(cf_variable, expected_cube): + result = build_raw_cube(cf_variable, "foo.nc") + assert result == expected_cube diff --git a/lib/iris/tests/unit/loading/__init__.py b/lib/iris/tests/unit/loading/__init__.py new file mode 100644 index 0000000000..ed31d1bac0 --- /dev/null +++ b/lib/iris/tests/unit/loading/__init__.py @@ -0,0 +1,5 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Unit tests for the :mod:`iris.loading` module.""" diff --git a/lib/iris/tests/unit/loading/test_load_problems.py b/lib/iris/tests/unit/loading/test_load_problems.py new file mode 100644 index 0000000000..d5a9c5734a --- /dev/null +++ b/lib/iris/tests/unit/loading/test_load_problems.py @@ -0,0 +1,173 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Unit tests for the :class:`iris.loading.LoadProblems` class.""" + +from traceback import TracebackException + +import pytest + +from iris.coords import DimCoord +from iris.cube import Cube +from iris.loading import LoadProblems +from iris.warnings import IrisLoadWarning + + +@pytest.fixture +def error(): + return ValueError("Example ValueError") + + +@pytest.fixture +def stack_trace(error): + try: + raise error + except ValueError as raised_error: + return TracebackException.from_exception(raised_error) + + +@pytest.fixture( + params=[dict, Cube, DimCoord, None], + ids=["loaded_dict", "loaded_Cube", "loaded_DimCoord", "loaded_None"], +) +def loaded_object(request): + lookup = { + dict: {"long_name": "foo"}, + Cube: Cube([1.0], long_name="foo"), + DimCoord: DimCoord([1.0], long_name="foo"), + None: None, + } + return lookup[request.param] + + +@pytest.fixture +def first_filename(): + return "test.nc" + + +@pytest.fixture +def problem_instance(first_filename, loaded_object, stack_trace): + return LoadProblems.Problem( + filename=first_filename, + loaded=loaded_object, + stack_trace=stack_trace, + ) + + +@pytest.fixture +def load_problems_instance(problem_instance): + problem2 = LoadProblems.Problem( + filename="test2.nc", + loaded=problem_instance.loaded, + stack_trace=problem_instance.stack_trace, + ) + problem3 = LoadProblems.Problem( + filename=problem_instance.filename, + loaded=None, + stack_trace=problem_instance.stack_trace, + ) + result = LoadProblems() + result._problems = [problem_instance, problem2, problem3] + return result + + +def test_problem_str(problem_instance): + if isinstance(problem_instance.loaded, (Cube, DimCoord)): + expected_loaded = problem_instance.loaded.summary(shorten=True) + else: + expected_loaded = str(problem_instance.loaded) + + expected = ( + f'{problem_instance.filename}: "{problem_instance.stack_trace}", ' + f"{expected_loaded}" + ) + assert str(problem_instance) == expected + + +def test_load_problems_str(load_problems_instance): + expected_lines = [ + f"{repr(load_problems_instance)}:", + *[f" {problem}" for problem in load_problems_instance.problems], + ] + expected = "\n".join(expected_lines) + assert str(load_problems_instance) == expected + + +def test_problems_property(load_problems_instance): + assert load_problems_instance.problems == load_problems_instance._problems + + +def test_problems_by_file_property(load_problems_instance): + filenames = [p.filename for p in load_problems_instance._problems] + expected = dict.fromkeys(filenames) + for filename in filenames: + expected[filename] = [ + p for p in load_problems_instance._problems if p.filename == filename + ] + assert load_problems_instance.problems_by_file == expected + + +def test_record(load_problems_instance, loaded_object, error, stack_trace): + def check_equality(problem: LoadProblems.Problem, expected: LoadProblems.Problem): + assert problem.filename == expected.filename + assert problem.loaded == expected.loaded + assert str(problem.stack_trace) == str(expected.stack_trace) + + file_names = ["test3.nc", "test4.nc"] + + expected_additions = [ + LoadProblems.Problem( + filename=filename, + loaded=loaded_object, + stack_trace=stack_trace, + ) + for filename in file_names + ] + expected_problems = load_problems_instance._problems + expected_additions + + for ix, filename in enumerate(file_names): + result = load_problems_instance.record( + filename=filename, + loaded=loaded_object, + exception=error, + ) + check_equality(result, expected_additions[ix]) + + for ix, problem in enumerate(load_problems_instance._problems): + check_equality(problem, expected_problems[ix]) + + +def test_warning(load_problems_instance, loaded_object, error): + with pytest.warns( + expected_warning=IrisLoadWarning, + match="Not all file objects were parsed correctly.", + ): + load_problems_instance.record( + filename="test3.nc", + loaded=loaded_object, + exception=error, + ) + + +def test_reset(load_problems_instance): + assert load_problems_instance._problems != [] + load_problems_instance.reset() + assert load_problems_instance._problems == [] + + +def test_reset_with_filename(load_problems_instance, first_filename): + original_problems = [p for p in load_problems_instance._problems] + load_problems_instance.reset(first_filename) + + for problem in original_problems: + if problem.filename == first_filename: + assert problem not in load_problems_instance._problems + else: + assert problem in load_problems_instance._problems + + +def test_global_instance(): + from iris.loading import LOAD_PROBLEMS + + assert isinstance(LOAD_PROBLEMS, LoadProblems)