Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
5b49ea1
Add gemini sites
alexdanjou Dec 9, 2025
c9bf6c7
First working version
alexdanjou Dec 15, 2025
c0ddd94
update to devel (#258)
alexdanjou Dec 15, 2025
5f2b58f
Upgraded version
alexdanjou Dec 16, 2025
b54cce9
Fix tests
alexdanjou Dec 17, 2025
469fb86
Fix test again
alexdanjou Dec 17, 2025
0c26f52
Enhancement
alexdanjou Dec 17, 2025
e0b1bec
Clean
alexdanjou Dec 17, 2025
b1d6801
Small enhancements
alexdanjou Dec 17, 2025
50b3e76
Change subplots config
alexdanjou Dec 17, 2025
7d51743
Update docstring
alexdanjou Dec 17, 2025
a940ff8
Add notebook cell for testing
melodb Feb 5, 2026
b34b3a6
Added option to separate plots by sites or models
moPeterAndrews Feb 6, 2026
0170e59
Merge branch 'devel' into add_mf_timeseries_for_multiple_siteswq
lionel42 Feb 16, 2026
8f4ef9e
Updates to reduce repeated code and fix formatting issues.
moPeterAndrews Feb 16, 2026
a28e7b4
Added some fixes for tests
moPeterAndrews Feb 17, 2026
f6f493c
Test fixes
moPeterAndrews Feb 18, 2026
41816fc
Apply suggestions from code review
lionel42 Feb 19, 2026
1a1c29c
Updates to formatting
moPeterAndrews Feb 19, 2026
657ff14
-Acted on suggestions from Lionel and Daniela
moPeterAndrews Feb 26, 2026
0f28f7e
Added default "None" option for uncertainty plotting, avoids errors
moPeterAndrews Feb 26, 2026
6214b6f
Some small formatting changes, fixed notebook changes
moPeterAndrews Feb 27, 2026
09c157d
Get default color is model_colors not available
melodb Feb 27, 2026
26bf805
Merge branch 'devel' of github.com:openghg/fluxy into add_mf_timeseri…
lionel42 Mar 2, 2026
d1e0fa5
simplify function
lionel42 Mar 2, 2026
92f3b1b
fixed types and formatting
lionel42 Mar 2, 2026
a7eb7b8
cleaning and formatting since dev update
lionel42 Mar 2, 2026
38a5b51
remove unused imports
lionel42 Mar 2, 2026
41cd72e
clean dataframe creation
lionel42 Mar 2, 2026
8198637
clean second dataset creation
lionel42 Mar 2, 2026
13e7873
cleaned another res table
lionel42 Mar 2, 2026
3e8db03
clean some code further
lionel42 Mar 2, 2026
86409d4
cleaning and correct usage of plot type
lionel42 Mar 2, 2026
1b05077
fix function signature
lionel42 Mar 2, 2026
b0b8857
clean minmax func
lionel42 Mar 2, 2026
e666e1d
clean function
lionel42 Mar 2, 2026
e447bff
clean new function
lionel42 Mar 2, 2026
d171840
can select the markers
lionel42 Mar 2, 2026
dbfb8a0
adding in example
lionel42 Mar 2, 2026
8461645
fix propagation
lionel42 Mar 2, 2026
e12761c
allow missing data in plot
lionel42 Mar 2, 2026
7acb96c
revert meta
lionel42 Mar 2, 2026
dbf8fba
adding test with only data
lionel42 Mar 2, 2026
7540410
fix attrs update
lionel42 Mar 2, 2026
a0082e7
duplicate import
lionel42 Mar 2, 2026
449c8b2
remove useless line
lionel42 Mar 2, 2026
64f9c33
improve if statement
lionel42 Mar 2, 2026
c2ebeaa
ensure variable is not unbound
lionel42 Mar 2, 2026
db9d3ad
type hints return
lionel42 Mar 2, 2026
6110a81
Update scripts/example_basics.ipynb
lionel42 Mar 2, 2026
f29ddcd
fix dataset to da and signatures
lionel42 Mar 2, 2026
04790a1
slice sites correctly on multiptle models and sites
lionel42 Mar 2, 2026
c8a27e5
bug fix for tests
lionel42 Mar 2, 2026
ed702b0
clarify boolean mask
lionel42 Mar 2, 2026
f2b72c9
improve slice sites
lionel42 Mar 2, 2026
00738ba
revert mistake
lionel42 Mar 2, 2026
a64b379
fix fill between
lionel42 Mar 2, 2026
9799c66
Merge remote-tracking branch 'refs/remotes/origin/add_mf_timeseries_f…
lionel42 Mar 2, 2026
a95b9e5
use variable with all sites
lionel42 Mar 2, 2026
7bd6f42
rename to mix
lionel42 Mar 4, 2026
02e07be
Fixed uncertainty plotting
moPeterAndrews Mar 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions configs/site_info.json
Original file line number Diff line number Diff line change
Expand Up @@ -1162,6 +1162,83 @@
"longitude": -0.30359
}
},
"GLE": {
"GEMINI": {
"latitude": 52.656,
"longitude": -1.13,
"height_station_masl": 54.0,
"long_name": "Space Park Leicester, Leicester, UK",
"height": ["column"],
"height_name": ["column"]
}
},

"GGU": {
"GEMINI": {
"latitude": 49.46,
"longitude": -2.54,
"height_station_masl": 47.0,
"long_name": "Elizabeth College, St Peters Port, Guernsey",
"height": ["column"],
"height_name": ["column"]
}
},

"GHA": {
"GEMINI": {
"latitude": 51.572,
"longitude": -1.315,
"height_station_masl": 142.0,
"long_name": "Rutherford Appleton Laboratory, Harwell, Oxfordshire, UK",
"height": ["column"],
"height_name": ["column"]
}
},

"GWE": {
"GEMINI": {
"latitude": 52.951,
"longitude": 1.123,
"height_station_masl": 18.0,
"long_name": "Weybourne Atmospheric Observatory, Norfolk, UK",
"height": ["column"],
"height_name": ["column"]
}
},

"GBI": {
"GEMINI": {
"latitude": 52.45,
"longitude": -1.93,
"height_station_masl": 145.0,
"long_name": "University of Birmingham, Birmingham, UK",
"height": ["column"],
"height_name": ["column"]
}
},

"GCA": {
"GEMINI": {
"latitude": 51.486,
"longitude": -3.183,
"height_station_masl": 23.0,
"long_name": "Cardiff University, Cardiff, UK",
"height": ["column"],
"height_name": ["column"]
}
},

"GNE": {
"GEMINI": {
"latitude": 54.98,
"longitude": -1.61,
"height_station_masl": 59.0,
"long_name": "Northumbria University, Newcastle, UK",
"height": ["column"],
"height_name": ["column"]
}
},

"GMI": {
"NOAA": {
"height_station_masl": 0.0,
Expand Down
22 changes: 7 additions & 15 deletions fluxy/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,15 +482,6 @@ def read_model_output(
if add_sites_to_flux and file_type == DataTypes.FLUX:
ds_all[m] = add_sites_var(ds_all[m], filepath, m, period[i], config_data)

# Overwrite species attributes
current_species = ds_all[m].attrs.get("species", "not set")
ds_all[m].attrs["species"] = current_species
if species is not None and current_species != species:
logger.info(
f"'species' attribute in dataset {m} ({current_species}) differs from species {species}. It is overwritten."
)
ds_all[m].attrs["species"] = species

return ds_all


Expand Down Expand Up @@ -780,9 +771,10 @@ def edit_vars_and_attributes(
xarray dataset with updated variables and attributes.
"""

# Add inversion frequency to global attributes
# Add inversion frequency and exp name to global attributes
if "frequency" not in ds.attrs:
ds.attrs["frequency"] = frequency
ds.attrs["exp_name"] = model

# Rename legacy variables
name_dict = {
Expand All @@ -799,14 +791,14 @@ def edit_vars_and_attributes(
filename_tags = os.path.basename(model)
m0 = filename_tags.split("_")[0].lower()

# check the species
# check (and overwrite) species attribute
if species is not None:
if "species" not in ds.attrs:
ds.attrs["species"] = species
elif ds.attrs["species"] != species:
current_species = ds.attrs.get("species", "not set")
if current_species != species:
logger.info(
f"Species {ds.attrs['species']} in dataset does not match species {species} in model {model}."
f"'species' attribute in dataset {model} ({current_species}) differs from species {species}. It is overwritten."
)
ds.attrs["species"] = species

file_type = DataTypes(file_type)

Expand Down
15 changes: 15 additions & 0 deletions fluxy/operators/mf.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,16 @@ def compute_mf_difference(
unique_platforms, platform_indices = np.unique(
common_platforms, return_inverse=True
)

# check species
species_set = {ds_left.attrs['species'],ds_left.attrs['species']}
if len(species_set)!=1:
logger.warning(f"Different species found {species_set} between the 2 compared datasets.")
species_set = "mix"
else:
species = list(species_set)[0]

# create dataset with coord and dim
ds_diff[key_name] = xr.Dataset(
coords={
"time": ("index", common_index.get_level_values("time").values),
Expand All @@ -82,6 +92,8 @@ def compute_mf_difference(
},
attrs={
"description": f"Difference between {model_left} and {model_right}",
"exp_name": f"{ds_left.attrs['exp_name']} - {ds_left.attrs['exp_name']}",
"species": species,
},
)

Expand Down Expand Up @@ -114,6 +126,7 @@ def stats_mf(
stats_type: Literal[
"prior", "posterior", "prior_above_BC", "posterior_above_BC"
] = "prior",
sites: list = None,
) -> pd.DataFrame:
"""
Calculates multiple statistical measures of the fit between the posterior
Expand All @@ -130,6 +143,7 @@ def stats_mf(
statistics on the absolute mole fractions and 'prior_above_BC',
'posterior_above_BC' for regional part of mole fraction, i.e. with
BC contribution subtracted from both observation and simulation.
sites: sites for which to make the stats.
Returns:
stats (pandas.DataFrame):
Dataframe containing the statistical measures.
Expand Down Expand Up @@ -174,4 +188,5 @@ def stats_mf(
ds_all,
obs_var=obs,
sim_var=sim,
sites=sites,
)
68 changes: 58 additions & 10 deletions fluxy/operators/select.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def slice_mf(
ds_all: dict[str, xr.Dataset],
start_date: str = None,
end_date: str = None,
site: str = None,
site: str | list [str] = None,
baseline_site: str = None,
baseline_filename: str = "InTEM_baseline_timestamps",
data_dir: os.PathLike | None = None,
Expand Down Expand Up @@ -225,7 +225,7 @@ def slice_mf(
return ds_all


def slice_site(ds: xr.Dataset, site: str) -> xr.Dataset:
def slice_site(ds: xr.Dataset, sites: str | list[str], raise_error: bool = True) -> xr.Dataset:
"""
Slices the dataset to only include data for a given site.

Expand All @@ -234,20 +234,37 @@ def slice_site(ds: xr.Dataset, site: str) -> xr.Dataset:
Dataset with mf data of a given model.
site (str):
Site of interest.
raise_error:
if True, raise an error if the site is not found in the dataset.
Returns:
ds (xarray dataset):
Dataset with mf data of a given model, sliced to only include data for the given site.
"""
if not isinstance(sites, list):
sites = [sites,]

site_index = get_site_index(ds, site)
mask = ds["number_of_identifier"]*False
for site in sites:
site_index = get_site_index(ds, site)

if site_index is None:
raise ValueError(f"Site {site} not found in dataset.")

mask = ds["number_of_identifier"] == site_index
ds = ds.where(mask, drop=True)

return ds
if site_index is not None:
mask += ds["number_of_identifier"] == site_index
else:
msg = f"Site {site} not found for model {ds.attrs['exp_name']}."
if raise_error:
raise ValueError(msg)
else:
logger.warning(msg)

if mask.any():
ds = ds.where(mask, drop=True)
return ds
else:
msg = f"No data for site {site} with index {site_index} in model {ds.attrs['exp_name']}."
if raise_error:
raise ValueError(msg)
else:
logger.warning(msg)


def slice_height(ds: xr.Dataset, intake_height: float) -> xr.Dataset:
Expand Down Expand Up @@ -496,3 +513,34 @@ def clean_timeseries_missing_data(
ds = ds.sortby("time")

return ds

def slice_site_dict_of_datasets(
ds_all: dict[str, xr.Dataset],
site: str,
) -> dict[str, xr.Dataset]:
"""
Slices all datasets in a dictionary to only include data for a given site.
Args:
ds_all (dictionary of datasets):
xarray datasets read directly from each model's flux netCDF.
site (str):
Site of interest.
Returns:
ds_all_site (dictionary of datasets):
xarray datasets, sliced to only include data for the given site.
"""

ds_all_site = dict()

for m, ds in ds_all.items():
logger.info(f"Slicing site {site} from {m}.")

if site in ds["platform"].values:
ds_all_site[m] = slice_site(ds, site)
else:
logger.warning(
f"Site {site} not found in dataset for {m}. "
f"Continuing without {m} - {site}."
)

return ds_all_site
22 changes: 11 additions & 11 deletions fluxy/operators/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
import numpy as np
import pandas as pd
import xarray as xr
from fluxy.operators.select import get_unique_sites, get_site_index
from fluxy.operators.select import get_unique_sites, slice_site


def stats_observed_vs_simulated(
ds_all: dict[str, dict],
obs_var: str,
sim_var: str,
sites: list = None,
) -> pd.DataFrame:
"""
Calculates multiple statistical measures of the fit between the observed
Expand All @@ -29,6 +30,8 @@ def stats_observed_vs_simulated(
Name of the observed variable.
sim_var (str):
Name of the simulated variable.
sites (list):
Sites for which to make the stats.

Returns:
stats (pandas.DataFrame):
Expand Down Expand Up @@ -62,24 +65,21 @@ def stats_observed_vs_simulated(

# names of sites
sites_all = get_unique_sites(ds_all)
if sites:
sites_all = [site for site in sites if site in sites_all]
sites_missing = [site for site in sites if site not in sites_all]
if sites_missing:
logger.warning(f"Sites {sites_missing} are not present in datasets.")

# init empty list to hold results for individual sites
stats = []

# Compute stats for all sites and all models
for site in sites_all:
for model, ds in ds_all.items():
site_index = get_site_index(ds, site)
if site_index is None:
logger.warning(f"Site {site} not found in model {model}.")
ds_site = slice_site(ds, site, raise_error=False)
if not ds_site:
continue
mask_site = ds["number_of_identifier"] == site_index
if not mask_site.any():
logger.warning(
f"No data for site {site} with index {site_index} in model {model}."
)
continue
ds_site = ds.where(mask_site, drop=True)

# select what to compare
obs = ds_site[obs_var]
Expand Down
Loading
Loading