Skip to content
Merged
Show file tree
Hide file tree
Changes from 59 commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
5b49ea1
Add gemini sites
alexdanjou Dec 9, 2025
c9bf6c7
First working version
alexdanjou Dec 15, 2025
c0ddd94
update to devel (#258)
alexdanjou Dec 15, 2025
5f2b58f
Upgraded version
alexdanjou Dec 16, 2025
b54cce9
Fix tests
alexdanjou Dec 17, 2025
469fb86
Fix test again
alexdanjou Dec 17, 2025
0c26f52
Enhancement
alexdanjou Dec 17, 2025
e0b1bec
Clean
alexdanjou Dec 17, 2025
b1d6801
Small enhancements
alexdanjou Dec 17, 2025
50b3e76
Change subplots config
alexdanjou Dec 17, 2025
7d51743
Update docstring
alexdanjou Dec 17, 2025
a940ff8
Add notebook cell for testing
melodb Feb 5, 2026
b34b3a6
Added option to separate plots by sites or models
moPeterAndrews Feb 6, 2026
0170e59
Merge branch 'devel' into add_mf_timeseries_for_multiple_siteswq
lionel42 Feb 16, 2026
8f4ef9e
Updates to reduce repeated code and fix formatting issues.
moPeterAndrews Feb 16, 2026
a28e7b4
Added some fixes for tests
moPeterAndrews Feb 17, 2026
f6f493c
Test fixes
moPeterAndrews Feb 18, 2026
41816fc
Apply suggestions from code review
lionel42 Feb 19, 2026
1a1c29c
Updates to formatting
moPeterAndrews Feb 19, 2026
657ff14
-Acted on suggestions from Lionel and Daniela
moPeterAndrews Feb 26, 2026
0f28f7e
Added default "None" option for uncertainty plotting, avoids errors
moPeterAndrews Feb 26, 2026
6214b6f
Some small formatting changes, fixed notebook changes
moPeterAndrews Feb 27, 2026
09c157d
Get default color is model_colors not available
melodb Feb 27, 2026
26bf805
Merge branch 'devel' of github.com:openghg/fluxy into add_mf_timeseri…
lionel42 Mar 2, 2026
d1e0fa5
simplify function
lionel42 Mar 2, 2026
92f3b1b
fixed types and formatting
lionel42 Mar 2, 2026
a7eb7b8
cleaning and formatting since dev update
lionel42 Mar 2, 2026
38a5b51
remove unused imports
lionel42 Mar 2, 2026
41cd72e
clean dataframe creation
lionel42 Mar 2, 2026
8198637
clean second dataset creation
lionel42 Mar 2, 2026
13e7873
cleaned another res table
lionel42 Mar 2, 2026
3e8db03
clean some code further
lionel42 Mar 2, 2026
86409d4
cleaning and correct usage of plot type
lionel42 Mar 2, 2026
1b05077
fix function signature
lionel42 Mar 2, 2026
b0b8857
clean minmax func
lionel42 Mar 2, 2026
e666e1d
clean function
lionel42 Mar 2, 2026
e447bff
clean new function
lionel42 Mar 2, 2026
d171840
can select the markers
lionel42 Mar 2, 2026
dbfb8a0
adding in example
lionel42 Mar 2, 2026
8461645
fix propagation
lionel42 Mar 2, 2026
e12761c
allow missing data in plot
lionel42 Mar 2, 2026
7acb96c
revert meta
lionel42 Mar 2, 2026
dbf8fba
adding test with only data
lionel42 Mar 2, 2026
7540410
fix attrs update
lionel42 Mar 2, 2026
a0082e7
duplicate import
lionel42 Mar 2, 2026
449c8b2
remove useless line
lionel42 Mar 2, 2026
64f9c33
improve if statement
lionel42 Mar 2, 2026
c2ebeaa
ensure variable is not unbound
lionel42 Mar 2, 2026
db9d3ad
type hints return
lionel42 Mar 2, 2026
6110a81
Update scripts/example_basics.ipynb
lionel42 Mar 2, 2026
f29ddcd
fix dataset to da and signatures
lionel42 Mar 2, 2026
04790a1
slice sites correctly on multiptle models and sites
lionel42 Mar 2, 2026
c8a27e5
bug fix for tests
lionel42 Mar 2, 2026
ed702b0
clarify boolean mask
lionel42 Mar 2, 2026
f2b72c9
improve slice sites
lionel42 Mar 2, 2026
00738ba
revert mistake
lionel42 Mar 2, 2026
a64b379
fix fill between
lionel42 Mar 2, 2026
9799c66
Merge remote-tracking branch 'refs/remotes/origin/add_mf_timeseries_f…
lionel42 Mar 2, 2026
a95b9e5
use variable with all sites
lionel42 Mar 2, 2026
7bd6f42
rename to mix
lionel42 Mar 4, 2026
02e07be
Fixed uncertainty plotting
moPeterAndrews Mar 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 7 additions & 15 deletions fluxy/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,15 +485,6 @@ def read_model_output(
if add_sites_to_flux and file_type == DataTypes.FLUX:
ds_all[m] = add_sites_var(ds_all[m], filepath, m, period[i], config_data)

# Overwrite species attributes
current_species = ds_all[m].attrs.get("species", "not set")
ds_all[m].attrs["species"] = current_species
if species is not None and current_species != species:
logger.info(
f"'species' attribute in dataset {m} ({current_species}) differs from species {species}. It is overwritten."
)
ds_all[m].attrs["species"] = species

return ds_all


Expand Down Expand Up @@ -788,9 +779,10 @@ def edit_vars_and_attributes(
xarray dataset with updated variables and attributes.
"""

# Add inversion frequency to global attributes
# Add inversion frequency and exp name to global attributes
if "frequency" not in ds.attrs:
ds.attrs["frequency"] = frequency
ds.attrs["exp_name"] = model

# Rename legacy variables
name_dict = {
Expand All @@ -807,14 +799,14 @@ def edit_vars_and_attributes(
filename_tags = os.path.basename(model)
m0 = filename_tags.split("_")[0].lower()

# check the species
# check (and overwrite) species attribute
if species is not None:
if "species" not in ds.attrs:
ds.attrs["species"] = species
elif ds.attrs["species"] != species:
current_species = ds.attrs.get("species", "not set")
if current_species != species:
logger.info(
f"Species {ds.attrs['species']} in dataset does not match species {species} in model {model}."
f"'species' attribute in dataset {model} ({current_species}) differs from species {species}. It is overwritten."
)
ds.attrs["species"] = species

file_type = DataTypes(file_type)

Expand Down
16 changes: 15 additions & 1 deletion fluxy/operators/mf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import xarray as xr
import pandas as pd
import logging
from fluxy.operators.select import get_unique_sites, get_site_index
from fluxy.operators.convert import get_variables
from typing import Literal

Expand Down Expand Up @@ -74,6 +73,19 @@ def compute_mf_difference(
unique_platforms, platform_indices = np.unique(
common_platforms, return_inverse=True
)

# check species
species_set = {ds_left.attrs["species"], ds_right.attrs["species"]}
if len(species_set) != 1:
logger.warning(
f"Different species found {species_set} between the 2 compared datasets."
)
species = species_set
species_set = "mix"
else:
species = list(species_set)[0]

# create dataset with coord and dim
ds_diff[key_name] = xr.Dataset(
coords={
"time": ("index", common_index.get_level_values("time").values),
Expand All @@ -82,6 +94,8 @@ def compute_mf_difference(
},
attrs={
"description": f"Difference between {model_left} and {model_right}",
"exp_name": f"{ds_left.attrs['exp_name']} - {ds_right.attrs['exp_name']}",
"species": species,
},
)

Expand Down
88 changes: 75 additions & 13 deletions fluxy/operators/select.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def slice_flux(

"""
ds_all_sliced = dict()
species_info = config_data.get("species_info",{}).get(species, None)
species_info = config_data.get("species_info", {}).get(species, None)

if type(start_date) is str:
start_date = [start_date] * len(ds_all.keys())
Expand Down Expand Up @@ -94,7 +94,7 @@ def slice_mf(
ds_all: dict[str, xr.Dataset],
start_date: str = None,
end_date: str = None,
site: str = None,
site: str | list[str] | None = None,
baseline_site: str = None,
baseline_filename: str = "InTEM_baseline_timestamps",
data_dir: os.PathLike | None = None,
Expand All @@ -114,7 +114,7 @@ def slice_mf(
end_date (str):
Date to slice data to, e.g. '2022-01-01' would include all
data up to 2021-12-31.
site (str):
site (str | list[str] | None):
Obs site to select data from, e.g. 'MHD'.
baseline_site (str):
Site used to define baseline at, options for 'MHD', 'JFJ', or 'CMN'.
Expand Down Expand Up @@ -225,29 +225,69 @@ def slice_mf(
return ds_all


def slice_site(ds: xr.Dataset, site: str) -> xr.Dataset:
def slice_site(
ds: xr.Dataset | dict[str, xr.Dataset],
site: str | list[str],
raise_error: bool = True,
) -> xr.Dataset | dict[str, xr.Dataset] | None:
"""
Slices the dataset to only include data for a given site.

Args:
ds (xarray dataset):
Dataset with mf data of a given model.
site (str):
Site of interest.
Can also be a dictionary of datasets, in which case the function
is applied to each dataset and return a dictionary of sliced datasets.
site (str | list[str]):
Site(s) of interest.
raise_error:
if True, raise an error if the site is not found in the dataset.
Returns:
ds (xarray dataset):
Dataset with mf data of a given model, sliced to only include data for the given site.
Dataset with mf data of a given model, sliced to only include data for the given site(s).
"""

site_index = get_site_index(ds, site)
if isinstance(ds, dict):
ds_all_site = dict()
for m, ds_this in ds.items():
logger.info(f"Slicing site {site} from {m}.")

if site_index is None:
raise ValueError(f"Site {site} not found in dataset.")
ds_sliced = slice_site(ds_this, site, raise_error=raise_error)
if ds_sliced is None:
logger.warning(
f"Site {site} not found in dataset for {m}. "
f"Continuing without {m} - {site}."
)
continue

mask = ds["number_of_identifier"] == site_index
ds = ds.where(mask, drop=True)
ds_all_site[m] = ds_sliced

return ds
return ds_all_site

if isinstance(site, str):
sites = [site]
else:
sites = site

site_indices = []
for site in sites:
site_index = get_site_index(ds, site)
if site_index is not None:
site_indices.append(site_index)
else:
logger.warning(f"Site {site} not found for model {ds.attrs['exp_name']}.")

mask = ds["number_of_identifier"].isin(site_indices)
if mask.any():
ds = ds.where(mask, drop=True)
return ds

msg = f"No data for any sites {sites} with indices {site_indices} in model {ds.attrs['exp_name']}."
if raise_error:
raise ValueError(msg)
else:
logger.warning(msg)
return None


def slice_height(ds: xr.Dataset, intake_height: float) -> xr.Dataset:
Expand Down Expand Up @@ -496,3 +536,25 @@ def clean_timeseries_missing_data(
ds = ds.sortby("time")

return ds


def check_site_list(
site_list: list[str] | None, ds_all: dict[str, xr.Dataset]
) -> list[str]:
"""
Check that every site in the list exists. If None, set it to all the sites available.
Args:
site_list: list of sites to check
ds_all: datasets into which check for the sites
Returns:
site_list: list of sites
"""
if site_list is None:
return get_unique_sites(ds_all)
available_sites = get_unique_sites(ds_all)
for site in site_list:
if site not in available_sites:
raise ValueError(
f"Site {site} not found in the datasets provided. Available sites are {available_sites}."
)
return site_list
14 changes: 3 additions & 11 deletions fluxy/operators/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import numpy as np
import pandas as pd
import xarray as xr
from fluxy.operators.select import get_unique_sites, get_site_index
from fluxy.operators.select import get_unique_sites, slice_site


def stats_observed_vs_simulated(
Expand Down Expand Up @@ -77,17 +77,9 @@ def stats_observed_vs_simulated(
# Compute stats for all sites and all models
for site in sites_all:
for model, ds in ds_all.items():
site_index = get_site_index(ds, site)
if site_index is None:
logger.warning(f"Site {site} not found in model {model}.")
ds_site = slice_site(ds, site, raise_error=False)
if ds_site is None:
continue
mask_site = ds["number_of_identifier"] == site_index
if not mask_site.any():
logger.warning(
f"No data for site {site} with index {site_index} in model {model}."
)
continue
ds_site = ds.where(mask_site, drop=True)

# select what to compare
obs = ds_site[obs_var]
Expand Down
Loading