-
Notifications
You must be signed in to change notification settings - Fork 31
Add support for reading, defining and outputting CF-compliant projection info #38
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 28 commits
1683d16
aa87576
4826e20
b67998d
ba4caa7
9244fdf
8dd485a
a8eacc4
db737ab
44dff49
fdb984b
91ddfc2
ac0792c
d895094
ae35d39
05c3921
350cf92
18980b3
5da27df
0677415
967e9bd
b830f95
a584f91
ed7250a
a6005f6
a63502d
d4480c0
7e5c79c
ec75257
7310352
2246522
b4c67d7
60bda54
95946c8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,6 +4,7 @@ | |
| from pathlib import Path | ||
|
|
||
| import numpy as np | ||
| import pyproj | ||
| import xarray as xr | ||
| from loguru import logger | ||
| from numcodecs import Blosc | ||
|
|
@@ -12,6 +13,11 @@ | |
| from .config import Config, InvalidConfigException | ||
| from .ops.loading import load_and_subset_dataset | ||
| from .ops.mapping import map_dims_and_variables | ||
| from .ops.projection import ( | ||
| ProjectionInconsistencyWarning, | ||
| get_projection_crs, | ||
| validate_projection_consistency, | ||
| ) | ||
| from .ops.selection import select_by_kwargs | ||
| from .ops.statistics import calc_stats | ||
|
|
||
|
|
@@ -122,13 +128,15 @@ def create_dataset(config: Config): | |
| output_coord_ranges = output_config.coord_ranges | ||
|
|
||
| dataarrays_by_target = defaultdict(list) | ||
| projections = [] | ||
|
|
||
| for dataset_name, input_config in config.inputs.items(): | ||
| path = input_config.path | ||
| variables = input_config.variables | ||
| target_output_var = input_config.target_output_variable | ||
| expected_input_attributes = input_config.attributes or {} | ||
| expected_input_var_dims = input_config.dims | ||
| dataset_projections = input_config.projections or {} | ||
|
|
||
| output_dims = output_config.variables[target_output_var] | ||
|
|
||
|
|
@@ -143,6 +151,10 @@ def create_dataset(config: Config): | |
| dataset_name=dataset_name, | ||
| ) | ||
|
|
||
| # TODO: remove this once grid_mapping is set in config file | ||
| for var in ds.data_vars: | ||
| ds[var].attrs["grid_mapping"] = "__common__" | ||
|
|
||
leifdenby marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| dim_mapping = input_config.dim_mapping | ||
|
|
||
| # check that there is an entry for each arch dimension | ||
|
|
@@ -174,6 +186,28 @@ def create_dataset(config: Config): | |
|
|
||
| da_target.attrs["source_dataset"] = dataset_name | ||
|
|
||
| # get the projection information from the dataset and update it with the projection | ||
| # information given in the input config | ||
| # TODO: read the projection information from the config | ||
| projection_crs = get_projection_crs(ds) | ||
| if projection_crs is None and dataset_projections: | ||
| logger.warning( | ||
| f"Projection information not found in dataset {dataset_name}, using projection information from config." | ||
| ) | ||
| projection_crs = { | ||
| crs_var: p.attributes for crs_var, p in dataset_projections.items() | ||
| } | ||
| elif projection_crs is None and not dataset_projections: | ||
| logger.warning( | ||
| f"No projection information found neither in dataset {dataset_name}, nor in config." | ||
| ) | ||
observingClouds marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| elif projection_crs is not None and dataset_projections: | ||
| logger.warning( | ||
| f"Projection information given in {dataset_name} is overwritten by that given in config." | ||
| ) | ||
| if projection_crs is not None: | ||
| projections.extend(projection_crs.values()) | ||
|
|
||
| # only need to do selection for the coordinates that the input dataset actually has | ||
| if output_coord_ranges is not None: | ||
| selection_kwargs = {} | ||
|
|
@@ -184,6 +218,18 @@ def create_dataset(config: Config): | |
|
|
||
| dataarrays_by_target[target_output_var].append(da_target) | ||
|
|
||
| # validate projections across input datasets | ||
| if projections: | ||
| try: | ||
| validate_projection_consistency(projections) | ||
| except ProjectionInconsistencyWarning as e: | ||
| logger.warning(f"Projection information might be ambiguous: {e}") | ||
| projection = pyproj.CRS.from_cf(projections[0]) | ||
|
|
||
| # TODO: generalize the retrieval of x and y coords | ||
| # coords = (dataarrays_by_target[target_output_var][0]['x'], dataarrays_by_target[target_output_var][0]['y']) | ||
| # lon, lat = get_latitude_longitude_from_projection(projection, coords) | ||
|
Comment on lines
+294
to
+296
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ealerskans this would be my current proposal for you on where and how you could generate the lats and lons from x and y. This is obviously per projection in each dataset and because we only accept one projection for now, you might just skip this step for all following datasets.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the function is available from
leifdenby marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| ds = _merge_dataarrays_by_target(dataarrays_by_target=dataarrays_by_target) | ||
|
|
||
| # need to drop the encoding so that we can write to zarr with new chunksizes | ||
|
|
@@ -230,6 +276,12 @@ def create_dataset(config: Config): | |
| ) | ||
| ds["splits"] = da_splits | ||
|
|
||
| # add the projection information to the dataset | ||
| if projections: | ||
| ds["__common__"] = xr.DataArray(0, attrs=projection.to_cf()).astype("int16") | ||
| # for var in ["VARIABLES_WITH_PROKJECTION"]: | ||
| # ds[var].attrs["grid_mapping"] = "__common__" | ||
|
|
||
| ds.attrs = {} | ||
| ds.attrs["schema_version"] = config.schema_version | ||
| ds.attrs["dataset_version"] = config.dataset_version | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.