openghg · lionel42 · Mar 9, 2026 · Dec 9, 2025 · Dec 15, 2025 · Dec 15, 2025
diff --git a/configs/site_info.json b/configs/site_info.json
@@ -1162,6 +1162,83 @@
       "longitude": -0.30359
     }
   },
+  "GLE": {
+    "GEMINI": {
+      "latitude": 52.656,
+      "longitude": -1.13,
+      "height_station_masl": 54.0,
+      "long_name": "Space Park Leicester, Leicester, UK",
+      "height": ["column"],
+      "height_name": ["column"]
+    }
+  },
+
+  "GGU": {
+    "GEMINI": {
+      "latitude": 49.46,
+      "longitude": -2.54,
+      "height_station_masl": 47.0,
+      "long_name": "Elizabeth College, St Peters Port, Guernsey",
+      "height": ["column"],
+      "height_name": ["column"]
+    }
+  },
+
+  "GHA": {
+    "GEMINI": {
+      "latitude": 51.572,
+      "longitude": -1.315,
+      "height_station_masl": 142.0,
+      "long_name": "Rutherford Appleton Laboratory, Harwell, Oxfordshire, UK",
+      "height": ["column"],
+      "height_name": ["column"]
+    }
+  },
+
+  "GWE": {
+    "GEMINI": {
+      "latitude": 52.951,
+      "longitude": 1.123,
+      "height_station_masl": 18.0,
+      "long_name": "Weybourne Atmospheric Observatory, Norfolk, UK",
+      "height": ["column"],
+      "height_name": ["column"]
+    }
+  },
+
+  "GBI": {
+    "GEMINI": {
+      "latitude": 52.45,
+      "longitude": -1.93,
+      "height_station_masl": 145.0,
+      "long_name": "University of Birmingham, Birmingham, UK",
+      "height": ["column"],
+      "height_name": ["column"]
+    }
+  },
+
+  "GCA": {
+    "GEMINI": {
+      "latitude": 51.486,
+      "longitude": -3.183,
+      "height_station_masl": 23.0,
+      "long_name": "Cardiff University, Cardiff, UK",
+      "height": ["column"],
+      "height_name": ["column"]
+    }
+  },
+
+  "GNE": {
+    "GEMINI": {
+      "latitude": 54.98,
+      "longitude": -1.61,
+      "height_station_masl": 59.0,
+      "long_name": "Northumbria University, Newcastle, UK",
+      "height": ["column"],
+      "height_name": ["column"]
+    }
+  },
+
   "GMI": {
     "NOAA": {
       "height_station_masl": 0.0,

diff --git a/fluxy/io.py b/fluxy/io.py
@@ -482,15 +482,6 @@ def read_model_output(
         if add_sites_to_flux and file_type == DataTypes.FLUX:
             ds_all[m] = add_sites_var(ds_all[m], filepath, m, period[i], config_data)
 
-        # Overwrite species attributes
-        current_species = ds_all[m].attrs.get("species", "not set")
-        ds_all[m].attrs["species"] = current_species
-        if species is not None and current_species != species:
-            logger.info(
-                f"'species' attribute in dataset {m} ({current_species}) differs from species {species}. It is overwritten."
-            )
-            ds_all[m].attrs["species"] = species
-
     return ds_all
 
 
@@ -780,9 +771,10 @@ def edit_vars_and_attributes(
             xarray dataset with updated variables and attributes.
     """
 
-    # Add inversion frequency to global attributes
+    # Add inversion frequency and exp name to global attributes
     if "frequency" not in ds.attrs:
         ds.attrs["frequency"] = frequency
+    ds.attrs["exp_name"] = model
 
     # Rename legacy variables
     name_dict = {
@@ -799,14 +791,14 @@ def edit_vars_and_attributes(
     filename_tags = os.path.basename(model)
     m0 = filename_tags.split("_")[0].lower()
 
-    # check the species
+    # check (and overwrite) species attribute
     if species is not None:
-        if "species" not in ds.attrs:
-            ds.attrs["species"] = species
-        elif ds.attrs["species"] != species:
+        current_species = ds.attrs.get("species", "not set")
+        if current_species != species:
             logger.info(
-                f"Species {ds.attrs['species']} in dataset does not match species {species} in model {model}."
+                f"'species' attribute in dataset {model} ({current_species}) differs from species {species}. It is overwritten."
             )
+            ds.attrs["species"] = species
 
     file_type = DataTypes(file_type)
 

diff --git a/fluxy/operators/mf.py b/fluxy/operators/mf.py
@@ -74,6 +74,16 @@ def compute_mf_difference(
     unique_platforms, platform_indices = np.unique(
         common_platforms, return_inverse=True
     )
+
+    # check species
+    species_set = {ds_left.attrs['species'],ds_left.attrs['species']}
+    if len(species_set)!=1:
+        logger.warning(f"Different species found {species_set} between the 2 compared datasets.")
+        species_set = "mix"
+    else:
+        species = list(species_set)[0]
+
+    # create dataset with coord and dim
     ds_diff[key_name] = xr.Dataset(
         coords={
             "time": ("index", common_index.get_level_values("time").values),
@@ -82,6 +92,8 @@ def compute_mf_difference(
         },
         attrs={
             "description": f"Difference between {model_left} and {model_right}",
+            "exp_name": f"{ds_left.attrs['exp_name']} - {ds_left.attrs['exp_name']}",
+            "species": species,
         },
     )
 
@@ -114,6 +126,7 @@ def stats_mf(
     stats_type: Literal[
         "prior", "posterior", "prior_above_BC", "posterior_above_BC"
     ] = "prior",
+    sites: list = None,
 ) -> pd.DataFrame:
     """
     Calculates multiple statistical measures of the fit between the posterior
@@ -130,6 +143,7 @@ def stats_mf(
             statistics on the absolute mole fractions and 'prior_above_BC',
             'posterior_above_BC' for regional part of mole fraction, i.e. with
             BC contribution subtracted from both observation and simulation.
+        sites: sites for which to make the stats.
     Returns:
         stats (pandas.DataFrame):
             Dataframe containing the statistical measures.
@@ -174,4 +188,5 @@ def stats_mf(
         ds_all,
         obs_var=obs,
         sim_var=sim,
+        sites=sites,
     )
diff --git a/fluxy/operators/select.py b/fluxy/operators/select.py
@@ -94,7 +94,7 @@ def slice_mf(
     ds_all: dict[str, xr.Dataset],
     start_date: str = None,
     end_date: str = None,
-    site: str = None,
+    site: str | list [str] = None,
     baseline_site: str = None,
     baseline_filename: str = "InTEM_baseline_timestamps",
     data_dir: os.PathLike | None = None,
@@ -225,7 +225,7 @@ def slice_mf(
     return ds_all
 
 
-def slice_site(ds: xr.Dataset, site: str) -> xr.Dataset:
+def slice_site(ds: xr.Dataset, sites: str | list[str], raise_error: bool = True) -> xr.Dataset:
     """
     Slices the dataset to only include data for a given site.
 
@@ -234,20 +234,37 @@ def slice_site(ds: xr.Dataset, site: str) -> xr.Dataset:
             Dataset with mf data of a given model.
         site (str):
             Site of interest.
+        raise_error:
+            if True, raise an error if the site is not found in the dataset.
     Returns:
         ds (xarray dataset):
             Dataset with mf data of a given model, sliced to only include data for the given site.
     """
+    if not isinstance(sites, list):
+        sites = [sites,]
 
-    site_index = get_site_index(ds, site)
+    mask = ds["number_of_identifier"]*False
+    for site in sites:
+        site_index = get_site_index(ds, site)
 
-    if site_index is None:
-        raise ValueError(f"Site {site} not found in dataset.")
-
-    mask = ds["number_of_identifier"] == site_index
-    ds = ds.where(mask, drop=True)
-
-    return ds
+        if site_index is not None:
+            mask += ds["number_of_identifier"] == site_index
+        else:
+            msg = f"Site {site} not found for model {ds.attrs['exp_name']}."
+            if raise_error:
+                raise ValueError(msg)
+            else:        
+                logger.warning(msg)
+
+    if mask.any():
+        ds = ds.where(mask, drop=True)
+        return ds
+    else:
+        msg = f"No data for site {site} with index {site_index} in model {ds.attrs['exp_name']}."
+    if raise_error:
+        raise ValueError(msg)
+    else:        
+        logger.warning(msg)        
 
 
 def slice_height(ds: xr.Dataset, intake_height: float) -> xr.Dataset:
@@ -496,3 +513,34 @@ def clean_timeseries_missing_data(
     ds = ds.sortby("time")
 
     return ds
+
+def slice_site_dict_of_datasets(
+    ds_all: dict[str, xr.Dataset],
+    site: str,
+) -> dict[str, xr.Dataset]:
+    """
+    Slices all datasets in a dictionary to only include data for a given site.
+    Args:
+        ds_all (dictionary of datasets):
+            xarray datasets read directly from each model's flux netCDF.
+        site (str):
+            Site of interest.
+    Returns:
+        ds_all_site (dictionary of datasets):
+            xarray datasets, sliced to only include data for the given site.
+    """
+
+    ds_all_site = dict()
+
+    for m, ds in ds_all.items():
+        logger.info(f"Slicing site {site} from {m}.")
+
+        if site in ds["platform"].values:
+            ds_all_site[m] = slice_site(ds, site)
+        else:
+            logger.warning(
+                f"Site {site} not found in dataset for {m}. "
+                f"Continuing without {m} - {site}."
+            )
+
+    return ds_all_site
diff --git a/fluxy/operators/stats.py b/fluxy/operators/stats.py
@@ -2,13 +2,14 @@
 import numpy as np
 import pandas as pd
 import xarray as xr
-from fluxy.operators.select import get_unique_sites, get_site_index
+from fluxy.operators.select import get_unique_sites, slice_site
 
 
 def stats_observed_vs_simulated(
     ds_all: dict[str, dict],
     obs_var: str,
     sim_var: str,
+    sites: list = None,
 ) -> pd.DataFrame:
     """
     Calculates multiple statistical measures of the fit between the observed
@@ -29,6 +30,8 @@ def stats_observed_vs_simulated(
             Name of the observed variable.
         sim_var (str):
             Name of the simulated variable.
+        sites (list): 
+            Sites for which to make the stats.
 
     Returns:
         stats (pandas.DataFrame):
@@ -62,24 +65,21 @@ def stats_observed_vs_simulated(
 
     # names of sites
     sites_all = get_unique_sites(ds_all)
+    if sites:
+         sites_all = [site for site in sites if site in sites_all]
+         sites_missing = [site for site in sites if site not in sites_all]
+         if sites_missing:
+             logger.warning(f"Sites {sites_missing} are not present in datasets.")
 
     # init empty list to hold results for individual sites
     stats = []
 
     # Compute stats for all sites and all models
     for site in sites_all:
         for model, ds in ds_all.items():
-            site_index = get_site_index(ds, site)
-            if site_index is None:
-                logger.warning(f"Site {site} not found in model {model}.")
+            ds_site = slice_site(ds, site, raise_error=False) 
+            if not ds_site:
                 continue
-            mask_site = ds["number_of_identifier"] == site_index
-            if not mask_site.any():
-                logger.warning(
-                    f"No data for site {site} with index {site_index} in model {model}."
-                )
-                continue
-            ds_site = ds.where(mask_site, drop=True)
 
             # select what to compare
             obs = ds_site[obs_var]