diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index d7f64d20..a4e8409a 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -26,7 +26,13 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install flake8 pytest jupyter nbconvert + pip install black + - name: Check code formatting with black + run: | + black --check ./tests ./fluxie + - name: Install the package + run: | + pip install pytest jupyter nbconvert pip install -e . - name: Test with pytest run: | diff --git a/fluxie/cli/annex_plot_generator.ipynb b/fluxie/cli/annex_plot_generator.ipynb index 02058a4f..02f5c86c 100644 --- a/fluxie/cli/annex_plot_generator.ipynb +++ b/fluxie/cli/annex_plot_generator.ipynb @@ -20,21 +20,22 @@ "%load_ext autoreload\n", "%autoreload 2\n", "import logging\n", + "\n", "logging.basicConfig(level=logging.WARNING)\n", "\n", "import annex_plot_generator as pg\n", "\n", "### Define region of interest\n", - "regions = 'UK' # Focus countries: UK, SWITZERLAND, GERMANY, ITALY, NETHERLANDS, IRELAND, HUNGARY, NORWAY\n", + "regions = \"UK\" # Focus countries: UK, SWITZERLAND, GERMANY, ITALY, NETHERLANDS, IRELAND, HUNGARY, NORWAY\n", "\n", "### Choose inventory year\n", - "inventory_years = ['2024','2025']\n", + "inventory_years = [\"2024\", \"2025\"]\n", "\n", "### Set output path\n", - "output_path = '/path/to/save/figures/'\n", + "output_path = \"/path/to/save/figures/\"\n", "\n", "### Produce plots\n", - "pg.produce_plots(regions, output_path, inventory_years)\n" + "pg.produce_plots(regions, output_path, inventory_years)" ] } ], diff --git a/fluxie/cli/annex_plot_generator.py b/fluxie/cli/annex_plot_generator.py index 4b36cac6..6af2267c 100644 --- a/fluxie/cli/annex_plot_generator.py +++ b/fluxie/cli/annex_plot_generator.py @@ -274,16 +274,21 @@ def produce_plots( annual_res = pd.concat(annual_res_list, ignore_index=True) hfcs_list = [s for s in annual_res.species.unique() if s[:3].lower() == "hfc"] - pfcs_list = [s for s in annual_res.species.unique() if s[:3].lower() in ["pfc","cf4"]] + pfcs_list = [ + s for s in annual_res.species.unique() if s[:3].lower() in ["pfc", "cf4"] + ] main_gases_list = ["ch4", "n2o", "sf6", "nf3", "all_pfc", "all_hfc"] - for name, species_list in zip(["hfc", "pfc", "main_gases"], - [hfcs_list, pfcs_list, main_gases_list]): + for name, species_list in zip( + ["hfc", "pfc", "main_gases"], [hfcs_list, pfcs_list, main_gases_list] + ): print(f"\nTABLE {name.upper().replace('_',' ')}") - sp_res = create_str_dataframe(annual_res, - inventory_years, - species_list, - table_start_date=annex_config_data.start_date_table) + sp_res = create_str_dataframe( + annual_res, + inventory_years, + species_list, + table_start_date=annex_config_data.start_date_table, + ) make_table(sp_res, output_path / f"{name}_res_{region}.tex", inventory_years) sp_res.to_csv(output_path / f"{name}_res_{region}.csv", index=False) diff --git a/fluxie/cli/config_annex_plot.py b/fluxie/cli/config_annex_plot.py index c5b2bc56..ac4412ff 100644 --- a/fluxie/cli/config_annex_plot.py +++ b/fluxie/cli/config_annex_plot.py @@ -35,53 +35,54 @@ # Lat/Lon limits in spatial maps map_limits = { - "UK": [-12,6,47,65], - "SWITZERLAND": [4,12,44.5,50], - "GERMANY": [2.5,15.5,46,57], #NOTE: equal to automatic GERMANY country mask, but needed for proper ITMS comparison - "ITALY": [5,20,35,49], + "UK": [-12, 6, 47, 65], + "SWITZERLAND": [4, 12, 44.5, 50], + # NOTE: equal to automatic GERMANY country mask, but needed for proper ITMS comparison + "GERMANY": [2.5, 15.5, 46, 57], + "ITALY": [5, 20, 35, 49], "NETHERLANDS": "BENELUX", "BELGIUM": "BENELUX", "BENELUX": "BENELUX", - "IRELAND": [-12,-2,50,58], - "HUNGARY": [14,25,44.5,50.5], - "NORWAY": [3,33,55,79], + "IRELAND": [-12, -2, 50, 58], + "HUNGARY": [14, 25, 44.5, 50.5], + "NORWAY": [3, 33, 55, 79], } # Limits in posterior spatial map color scale (in units flux_units_print) # default = "auto" fluxlim = { "UK": { - "pfc116": [0,0.16], - "pfc318": [0,0.1], - "nf3": [0,0.04], + "pfc116": [0, 0.16], + "pfc318": [0, 0.1], + "nf3": [0, 0.04], }, "IRELAND": { - "cf4": [0,0.5], - "pfc218": [0,0.4], - "pfc318": [0,0.1], - "sf6": [0,0.4], - "nf3": [0,0.04], + "cf4": [0, 0.5], + "pfc218": [0, 0.4], + "pfc318": [0, 0.1], + "sf6": [0, 0.4], + "nf3": [0, 0.04], }, "NETHERLANDS": { - "ch4": [0,40000], - "cf4": [0,1], - "pfc218": [0,0.2], - "sf6": [0,1], + "ch4": [0, 40000], + "cf4": [0, 1], + "pfc218": [0, 0.2], + "sf6": [0, 1], }, "BELGIUM": { - "ch4": [0,40000], - "cf4": [0,1], - "pfc218": [0,0.2], - "sf6": [0,1], + "ch4": [0, 40000], + "cf4": [0, 1], + "pfc218": [0, 0.2], + "sf6": [0, 1], }, "BENELUX": { - "ch4": [0,40000], - "cf4": [0,1], - "pfc218": [0,0.2], - "sf6": [0,1], + "ch4": [0, 40000], + "cf4": [0, 1], + "pfc218": [0, 0.2], + "sf6": [0, 1], }, "ITALY": { - "hfc23": [0,1], + "hfc23": [0, 1], }, } @@ -112,7 +113,7 @@ "hfc23": 0.95, "hfc236fa": 0.98, "hfc245fa": 0.95, - "hfc365mfc": 0.985, + "hfc365mfc": 0.985, "cf4": 0.97, "sf6": 0.96, }, @@ -205,7 +206,7 @@ "hfc236fa": 0.985, "hfc245fa": 0.96, "hfc365mfc": 0.96, - "hfc4310mee": 0.98, + "hfc4310mee": 0.98, "pfc116": 0.9999, }, "HUNGARY": { @@ -215,7 +216,7 @@ "hfc134a": 0.95, "hfc143a": 0.999, "hfc227ea": 0.97, - "hfc23":0.98, + "hfc23": 0.98, "cf4": 0.98, "pfc116": 0.995, "pfc218": 1, diff --git a/fluxie/cli/utils_annex_plot.py b/fluxie/cli/utils_annex_plot.py index e0cd9ab9..8a4bfd9c 100644 --- a/fluxie/cli/utils_annex_plot.py +++ b/fluxie/cli/utils_annex_plot.py @@ -1,6 +1,7 @@ import pandas as pd import numpy as np + def get_species_specific_settings( species: str, period: str, settings: list | dict ) -> list | dict: @@ -31,6 +32,7 @@ def get_species_specific_settings( return settings_species + def create_str_dataframe( res: dict, inventory_year: str | int, @@ -41,7 +43,7 @@ def create_str_dataframe( table_start_date: str | None = None, ) -> pd.DataFrame: """ - Create a dataframe with results for a specific country, region and model. The columns are "species", "units", "source" (which values are "" and "NID ") and the years present. + Create a dataframe with results for a specific country, region and model. The columns are "species", "units", "source" (which values are "" and "NID ") and the years present. The values are string of the form " \\pm ", made to be usable directly to make the tex output for the annexes tables. Set the units and right number of digits. NOTE: Assume that the units in res is Tg CO2-eq yr-1 Args: @@ -56,107 +58,146 @@ def create_str_dataframe( Return: output: pandas Dataframe contianing the string values that will be put in the .tex files for the annexes tables. """ - + if not table_start_date: - table_start_date = np.datetime64("1900-01-01") + table_start_date = np.datetime64("1900-01-01") elif isinstance(table_start_date, str): - table_start_date = np.datetime64(table_start_date) - + table_start_date = np.datetime64(table_start_date) + if not region: - if res.country.unique().size!=1: + if res.country.unique().size != 1: raise ValueError( f"`region` parameter should be provided when there is more than one region in `res` (currently present: {res.country.unique()})." ) region = res.country.unique()[0] if not sector: - if res.sector.unique().size!=1: + if res.sector.unique().size != 1: raise ValueError( f"`sector` parameter should be provided when there is more than one region in `res` (currently present: {res.sector.unique()})." ) sector = res.sector.unique()[0] - if not isinstance(species,list): - species = [species,] + if not isinstance(species, list): + species = [ + species, + ] res["time"] = pd.to_datetime(res["time"]) - data = res[(res.country==region) - &(res.sector==sector) - &(res.model.isin([model,f"inventory_{inventory_year}"])) - &res.species.isin(species) - &(res.type.isin(["posterior","inventory"])) - &(res.time>=table_start_date) - ].reset_index(drop=True) - + data = res[ + (res.country == region) + & (res.sector == sector) + & (res.model.isin([model, f"inventory_{inventory_year}"])) + & res.species.isin(species) + & (res.type.isin(["posterior", "inventory"])) + & (res.time >= table_start_date) + ].reset_index(drop=True) + data["year"] = pd.to_datetime(data["time"]).dt.year.astype(str) - species_order = data[data.model==model].groupby("species").mean_val.mean().sort_values(ascending=False).index + species_order = ( + data[data.model == model] + .groupby("species") + .mean_val.mean() + .sort_values(ascending=False) + .index + ) rescaled_data = list() for species in data.species.unique(): - data_per_species = data[data.species==species].copy() - - _, exp = np.stack(data_per_species.mean_val.apply(lambda x: np.array(f"{x:.2e}".split("e")).astype(float) - if f"{x:.2e}"!="0.00e+00" else [np.nan,np.nan]).values).T + data_per_species = data[data.species == species].copy() + + _, exp = np.stack( + data_per_species.mean_val.apply( + lambda x: ( + np.array(f"{x:.2e}".split("e")).astype(float) + if f"{x:.2e}" != "0.00e+00" + else [np.nan, np.nan] + ) + ).values + ).T max_exp = np.nanmax(exp) - if max_exp<-1 and max_exp>=-4: - for var in ["mean_val","min_unc","max_unc"]: + if max_exp < -1 and max_exp >= -4: + for var in ["mean_val", "min_unc", "max_unc"]: data_per_species[var] *= 1e3 - data_per_species["units"] = "\\footnotesize{$\\left (\\rm{GgCO}_{2}\\rm{\\text{-}eq} \\cdot \\rm{yr}^{-1} \\right )$}" + data_per_species["units"] = ( + "\\footnotesize{$\\left (\\rm{GgCO}_{2}\\rm{\\text{-}eq} \\cdot \\rm{yr}^{-1} \\right )$}" + ) max_exp += 3 - elif max_exp<-4: - for var in ["mean_val","min_unc","max_unc"]: + elif max_exp < -4: + for var in ["mean_val", "min_unc", "max_unc"]: data_per_species[var] *= 1e6 - data_per_species["units"] = "\\footnotesize{$\\left (\\rm{MgCO}_{2}\\rm{\\text{-}eq} \\cdot \\rm{yr}^{-1} \\right )$}" + data_per_species["units"] = ( + "\\footnotesize{$\\left (\\rm{MgCO}_{2}\\rm{\\text{-}eq} \\cdot \\rm{yr}^{-1} \\right )$}" + ) max_exp += 6 else: - data_per_species["units"] = "\\footnotesize{$\\left (\\rm{TgCO}_{2}\\rm{\\text{-}eq} \\cdot \\rm{yr}^{-1} \\right )$}" - + data_per_species["units"] = ( + "\\footnotesize{$\\left (\\rm{TgCO}_{2}\\rm{\\text{-}eq} \\cdot \\rm{yr}^{-1} \\right )$}" + ) + n_figure = 3 if species in ["ch4", "n2o"] else 2 n_digits = int(n_figure - max_exp - 1) - data_per_species["mean_val"] = data_per_species.mean_val.apply(lambda x: f"{x:.{n_digits}f}") - data_per_species["unc"] = data_per_species.apply(lambda x: f"{(x.max_unc-x.min_unc)/2:.{n_digits}f}" if x.type!="inventory" else "", axis=1) + data_per_species["mean_val"] = data_per_species.mean_val.apply( + lambda x: f"{x:.{n_digits}f}" + ) + data_per_species["unc"] = data_per_species.apply( + lambda x: ( + f"{(x.max_unc-x.min_unc)/2:.{n_digits}f}" + if x.type != "inventory" + else "" + ), + axis=1, + ) data_per_species = pd.concat([data_per_species]) rescaled_data.append(data_per_species) data = pd.concat(rescaled_data) - data["val"] = data.apply(lambda x : x.mean_val if x.type=="inventory" - else f"{x.mean_val} \\pm {x.unc}", - axis=1) + data["val"] = data.apply( + lambda x: x.mean_val if x.type == "inventory" else f"{x.mean_val} \\pm {x.unc}", + axis=1, + ) - output = data.pivot(index=["model","species","units"],columns="year",values = "val").reset_index() + output = data.pivot( + index=["model", "species", "units"], columns="year", values="val" + ).reset_index() output.columns.name = None - output.fillna(" ",inplace=True) + output.fillna(" ", inplace=True) - output.rename(columns={"model":"source"},inplace=True) - output["source"] = output["source"].apply(lambda x: x.replace("inventory_","NID ")) + output.rename(columns={"model": "source"}, inplace=True) + output["source"] = output["source"].apply(lambda x: x.replace("inventory_", "NID ")) - output["sort_col1"] = output.species.apply(lambda x : species_order.get_loc(x)) - output["sort_col2"] = output.source.apply(lambda x : 1 if x==model else 0) - output.sort_values(by=["sort_col1","sort_col2"], inplace=True, ignore_index=True) + output["sort_col1"] = output.species.apply(lambda x: species_order.get_loc(x)) + output["sort_col2"] = output.source.apply(lambda x: 1 if x == model else 0) + output.sort_values(by=["sort_col1", "sort_col2"], inplace=True, ignore_index=True) del output["sort_col1"], output["sort_col2"] - species_name = {"ch4":"CH$_4$", "n2o":"N$_2$O", "sf6": "SF$_6$", "nf3": "NF$_3$", "cf4": "PFC-14", "all_pfc": "Total PFC", "all_hfc": "Total HFC"} + species_name = { + "ch4": "CH$_4$", + "n2o": "N$_2$O", + "sf6": "SF$_6$", + "nf3": "NF$_3$", + "cf4": "PFC-14", + "all_pfc": "Total PFC", + "all_hfc": "Total HFC", + } for species in output.species.unique(): if species not in species_name.keys(): - species_name[species] = species.replace("hfc","HFC-").replace("pfc","PFC-") + species_name[species] = species.replace("hfc", "HFC-").replace( + "pfc", "PFC-" + ) output.replace(species_name, inplace=True) index_col = ["species", "units", "source"] columns = np.concatenate( [ index_col, - np.sort( - [ - col - for col in output.columns - if col not in index_col - ]), + np.sort([col for col in output.columns if col not in index_col]), ] ) output = output[columns] - + return output @@ -187,13 +228,13 @@ def make_table( + label + caption + "\n \\begin{center}\n \\begin{tabular}{ " - + (len(descriptive_cols)-1) * "l " + + (len(descriptive_cols) - 1) * "l " + (len(df.columns) - len(descriptive_cols)) * "l " + "}" ) # Set first line with columns title - header = " " + (len(descriptive_cols)-1) * " & " + header = " " + (len(descriptive_cols) - 1) * " & " for y in df.columns[len(descriptive_cols) :]: header += y if y != df.columns[-1]: @@ -229,7 +270,7 @@ def make_table( # Add hline if needed for key in hline_place.keys(): - if k == nrows-1: + if k == nrows - 1: l += " \\bottomrule " elif row[key] == hline_place[key]: l += " \\midrule " diff --git a/fluxie/operators/convert.py b/fluxie/operators/convert.py index b6f85f8c..1557f23b 100644 --- a/fluxie/operators/convert.py +++ b/fluxie/operators/convert.py @@ -298,10 +298,7 @@ def get_units_conversion_factor( return unit_to_base / target_to_base * M_scaling -def convert_units_co2eq( - from_unit: str, to_unit: str, species_info: dict -) -> float: - +def convert_units_co2eq(from_unit: str, to_unit: str, species_info: dict) -> float: """ Convert between units that may include 'CO2-eq'. Wraps get_units_conversion_factor and applies species GWP when needed. @@ -318,8 +315,7 @@ def strip_co2eq(unit): # Base physical conversion conversion_factor = get_units_conversion_factor( - from_unit=from_base, - to_unit=to_base + from_unit=from_base, to_unit=to_base ) # Get GWP if needed diff --git a/fluxie/operators/flux_map_resample.py b/fluxie/operators/flux_map_resample.py index de102eb9..4b5a4102 100644 --- a/fluxie/operators/flux_map_resample.py +++ b/fluxie/operators/flux_map_resample.py @@ -263,7 +263,7 @@ def resample_over_dates_list( ds, groups_da, resample_uncert_correlation ) ds_resampled = ds_resampled.rename({"group": "time"}) - + # Create labels based on the first and last date in each group time_labels = [] for group in np.unique(groups_da): @@ -276,6 +276,7 @@ def resample_over_dates_list( return ds_resampled, time_labels + def resample_over_periods_list( ds: xr.Dataset, start_dates_list: List[str], @@ -304,7 +305,9 @@ def resample_over_periods_list( # --- basic consistency check --- if len(start_dates_list) != len(end_dates_list): - raise ValueError("'start_dates_list' and 'end_dates_list' must have the same length.") + raise ValueError( + "'start_dates_list' and 'end_dates_list' must have the same length." + ) n_periods = len(start_dates_list) @@ -781,7 +784,7 @@ def resample_over_period( N: int = 1, chop_by: ( Literal["year", "month", "season"] - | List + | List | Tuple[List, List] | Literal["DJF", "MAM", "JJA", "SON"] | None @@ -805,7 +808,7 @@ def resample_over_period( Interval length for custom periods (e.g., for months or years). chop_by (str, list): Defines how the dataset should be chopped. - Options are: 'year', 'month', 'season', None, a list of dates or months, + Options are: 'year', 'month', 'season', None, a list of dates or months, a season, or a tuple of lists of dates. resample_uncert_correlation (bool): If True, uncertainties are averaged directly over groups. @@ -826,13 +829,17 @@ def resample_over_period( start_dates_list, end_dates_list = chop_by if ( - isinstance(start_dates_list, list) + isinstance(start_dates_list, list) and isinstance(end_dates_list, list) and len(start_dates_list) == len(end_dates_list) - and all(isinstance(s, (str, datetime.date, np.datetime64, pd.Timestamp)) - for s in start_dates_list) - and all(isinstance(e, (str, datetime.date, np.datetime64, pd.Timestamp)) - for e in end_dates_list) + and all( + isinstance(s, (str, datetime.date, np.datetime64, pd.Timestamp)) + for s in start_dates_list + ) + and all( + isinstance(e, (str, datetime.date, np.datetime64, pd.Timestamp)) + for e in end_dates_list + ) ): # Directly call the new date-range resampling return resample_over_periods_list( @@ -847,7 +854,7 @@ def resample_over_period( "with equal-length lists of valid dates." ) - # ------------------------------------------------------------------ + # ------------------------------------------------------------------ if isinstance(chop_by, list): # Case where chop_by is a list of dates diff --git a/fluxie/operators/flux_scale_by_sector_proportions.py b/fluxie/operators/flux_scale_by_sector_proportions.py index c842c737..74ecc3c9 100644 --- a/fluxie/operators/flux_scale_by_sector_proportions.py +++ b/fluxie/operators/flux_scale_by_sector_proportions.py @@ -150,9 +150,7 @@ def scale_by_sector_proportions( data_dir, "sector_flux", f"{sector_file}_{species}_yearly_flux_sectors.nc" ) - logger.info( - f"Using {sector_prop_path} to scale total fluxes into sector fluxes." - ) + logger.info(f"Using {sector_prop_path} to scale total fluxes into sector fluxes.") ds_all_out = {} scaling_factor_all = {} @@ -165,7 +163,7 @@ def scale_by_sector_proportions( sectors = [v.split("_")[-1] for v in ds_sectors if "total" not in v] logger.warning( "No sectors specified, so reading sector list from sector_flux file." - +f" Used sectors: {sectors}" + + f" Used sectors: {sectors}" ) # Convert prior and posterior flux for each sector diff --git a/fluxie/operators/stats.py b/fluxie/operators/stats.py index c2f064b0..3c516fb0 100644 --- a/fluxie/operators/stats.py +++ b/fluxie/operators/stats.py @@ -30,7 +30,7 @@ def stats_observed_vs_simulated( Name of the observed variable. sim_var (str): Name of the simulated variable. - sites (list): + sites (list): Sites for which to make the stats. Returns: @@ -66,10 +66,10 @@ def stats_observed_vs_simulated( # names of sites sites_all = get_unique_sites(ds_all) if sites: - sites_all = [site for site in sites if site in sites_all] - sites_missing = [site for site in sites if site not in sites_all] - if sites_missing: - logger.warning(f"Sites {sites_missing} are not present in datasets.") + sites_all = [site for site in sites if site in sites_all] + sites_missing = [site for site in sites if site not in sites_all] + if sites_missing: + logger.warning(f"Sites {sites_missing} are not present in datasets.") # init empty list to hold results for individual sites stats = [] @@ -77,7 +77,7 @@ def stats_observed_vs_simulated( # Compute stats for all sites and all models for site in sites_all: for model, ds in ds_all.items(): - ds_site = slice_site(ds, site, raise_error=False) + ds_site = slice_site(ds, site, raise_error=False) if ds_site is None: continue diff --git a/fluxie/plots/ec_flux/sectorial_stack.py b/fluxie/plots/ec_flux/sectorial_stack.py index cded2494..95f1f084 100644 --- a/fluxie/plots/ec_flux/sectorial_stack.py +++ b/fluxie/plots/ec_flux/sectorial_stack.py @@ -210,7 +210,7 @@ def plot_stacked( errorbar_kwargs = errorbar_kwargs.copy() yerr = df_obs["std"].values.reshape(-1) # Replace NaN with 0.0 - yerr = np.where(np.isnan(yerr), 0.0, yerr) + yerr = np.where(np.isnan(yerr), 0.0, yerr) errorbar_kwargs["yerr"] = np.array(yerr) ax.errorbar( df_obs.index, @@ -271,7 +271,9 @@ def plot_stacked( "%m_%H": "Month and hour of the day", "%H_%M": "Hour and minute of the day", } - x_label = "Wind direction" if wind_plot else x_labels.get(group_format, group_format) + x_label = ( + "Wind direction" if wind_plot else x_labels.get(group_format, group_format) + ) y_label = f"{species} Flux " " [ µmol m$^{-2}$ s$^{-1}$ ]" if not wind_plot: ax.set_ylabel(y_label) diff --git a/fluxie/plots/mf_timeseries.py b/fluxie/plots/mf_timeseries.py index b91a6d70..b61971a2 100644 --- a/fluxie/plots/mf_timeseries.py +++ b/fluxie/plots/mf_timeseries.py @@ -543,7 +543,7 @@ def add_unc_plot( if plot_type is None: return - + min_unc = np.array(min_unc, dtype=float) max_unc = np.array(max_unc, dtype=float) diff --git a/fluxie/types.py b/fluxie/types.py index a2023fbf..608b3bc9 100644 --- a/fluxie/types.py +++ b/fluxie/types.py @@ -14,21 +14,18 @@ class DataTypes(Enum): EDDY_FLUX = "eddy_flux" -def file_pattern( - file_type: DataTypes, - alternative: bool = False -) -> str: +def file_pattern(file_type: DataTypes, alternative: bool = False) -> str: """ Returns the ending pattern for the given file type. Args: file_type (DataTypes): Type of file (flux or concentration as defined in class DataTypes) - alternative (bool): - If true an alternative file ending is used for flux files. + alternative (bool): + If true an alternative file ending is used for flux files. Returns: ds_all (str): - filename ending depending on data type. + filename ending depending on data type. """ if file_type == DataTypes.FLUX: diff --git a/tests/ecflux/test_ecflux_operators.py b/tests/ecflux/test_ecflux_operators.py index 4b5af21f..ebb2d5ca 100644 --- a/tests/ecflux/test_ecflux_operators.py +++ b/tests/ecflux/test_ecflux_operators.py @@ -4,7 +4,6 @@ from fluxie.plots.ec_flux.sectorial_stack import plot_stacked from fluxie.test_utils import data_dir - test_models = ["EDDY_HARDAU", "EDDY_HARDAU_STORAGE_2LAYERS"] diff --git a/tests/test_annex.py b/tests/test_annex.py index b466bc94..4aadc557 100644 --- a/tests/test_annex.py +++ b/tests/test_annex.py @@ -12,66 +12,85 @@ def make_test_data(): inv_year = 2000 model = "CAT" - inv_data = pd.DataFrame({"type":["inventory",]*time.size, - "model":[f"inventory_{inv_year}",]*time.size, - "sector":[sector,]*time.size, - "country":[country,]*time.size, - "species":[species,]*time.size, - "time": time.astype("datetime64[ns]"), - "mean_val": np.array([0.0, 0.1]),}) - - prior_data = pd.DataFrame({"type":["prior",]*time.size, - "model":[model,]*time.size, - "sector":[sector,]*time.size, - "country":[country,]*time.size, - "species":[species,]*time.size, - "time": time.astype("datetime64[ns]"), - "mean_val": np.array([0.0, 0.5]),}) - - post_data = pd.DataFrame({"type":["posterior",]*time.size, - "model":[model,]*time.size, - "sector":[sector,]*time.size, - "country":[country,]*time.size, - "species":[species,]*time.size, - "time": time.astype("datetime64[ns]"), - "mean_val": np.array([1.0, 1.0]), - "min_unc": np.array([0.0, 0.0]), - "max_unc": np.array([2.0, 2.0]),}) - + inv_data = pd.DataFrame( + { + "type": ["inventory"] * time.size, + "model": [f"inventory_{inv_year}"] * time.size, + "sector": [sector] * time.size, + "country": [country] * time.size, + "species": [species] * time.size, + "time": time.astype("datetime64[ns]"), + "mean_val": np.array([0.0, 0.1]), + } + ) + + prior_data = pd.DataFrame( + { + "type": ["prior"] * time.size, + "model": [model] * time.size, + "sector": [sector] * time.size, + "country": [country] * time.size, + "species": [species] * time.size, + "time": time.astype("datetime64[ns]"), + "mean_val": np.array([0.0, 0.5]), + } + ) + + post_data = pd.DataFrame( + { + "type": ["posterior"] * time.size, + "model": [model] * time.size, + "sector": [sector] * time.size, + "country": [country] * time.size, + "species": [species] * time.size, + "time": time.astype("datetime64[ns]"), + "mean_val": np.array([1.0, 1.0]), + "min_unc": np.array([0.0, 0.0]), + "max_unc": np.array([2.0, 2.0]), + } + ) + res = pd.concat([inv_data, prior_data, post_data], ignore_index=True) return res + def test_dict_to_str_dataframe(): data = make_test_data() expected = pd.DataFrame( { - "species": ["CH$_4$",]*2, - "units": ["\\footnotesize{$\\left (\\rm{TgCO}_{2}\\rm{\\text{-}eq} \\cdot \\rm{yr}^{-1} \\right )$}",]*2, + "species": ["CH$_4$"] * 2, + "units": [ + "\\footnotesize{$\\left (\\rm{TgCO}_{2}\\rm{\\text{-}eq} \\cdot \\rm{yr}^{-1} \\right )$}" + ] + * 2, "source": ["NID 2000", "CAT"], "1900": ["0.00", "1.00 \\pm 1.00"], "2000": ["0.10", "1.00 \\pm 1.00"], } ) - output = create_str_dataframe(data,"2000","ch4",sector="flower",model="CAT") - - assert (output==expected).values.all() + output = create_str_dataframe(data, "2000", "ch4", sector="flower", model="CAT") + + assert (output == expected).values.all() for var in ["mean_val", "min_unc", "max_unc"]: data[var] *= 1e-2 - + expected = pd.DataFrame( { - "species": ["CH$_4$",]*2, - "units": ["\\footnotesize{$\\left (\\rm{GgCO}_{2}\\rm{\\text{-}eq} \\cdot \\rm{yr}^{-1} \\right )$}",]*2, + "species": ["CH$_4$"] * 2, + "units": [ + "\\footnotesize{$\\left (\\rm{GgCO}_{2}\\rm{\\text{-}eq} \\cdot \\rm{yr}^{-1} \\right )$}" + ] + * 2, "source": ["NID 2000", "CAT"], "1900": ["0.0", "10.0 \\pm 10.0"], "2000": ["1.0", "10.0 \\pm 10.0"], } ) - - output = create_str_dataframe(data,"2000","ch4",sector="flower",model="CAT") - - assert (output==expected).values.all() \ No newline at end of file + + output = create_str_dataframe(data, "2000", "ch4", sector="flower", model="CAT") + + assert (output == expected).values.all() diff --git a/tests/test_flux_map_resample.py b/tests/test_flux_map_resample.py index 84dea7c7..96922990 100644 --- a/tests/test_flux_map_resample.py +++ b/tests/test_flux_map_resample.py @@ -5,7 +5,7 @@ resample_over_period, ) -dss = get_loaded_models(test_models,"flux") +dss = get_loaded_models(test_models, "flux") @pytest.mark.parametrize("model", test_models) diff --git a/tests/test_flux_scale_by_sector_proportions.py b/tests/test_flux_scale_by_sector_proportions.py index 202f7981..5f824999 100644 --- a/tests/test_flux_scale_by_sector_proportions.py +++ b/tests/test_flux_scale_by_sector_proportions.py @@ -1,7 +1,9 @@ import pytest from pathlib import Path import fluxie -from fluxie.operators.flux_scale_by_sector_proportions import scale_by_sector_proportions +from fluxie.operators.flux_scale_by_sector_proportions import ( + scale_by_sector_proportions, +) from fluxie.io import read_config_files, read_model_output from fluxie.operators.select import slice_flux @@ -27,7 +29,7 @@ def test_scale_by_sector_proportions(): create_region_sector_totals = True # if True, uses country_fraction variable to sum spatial sector fluxes to region sector fluxes ds_all_flux_scaled = {} - + ds_all_flux = read_model_output( data_dir, "flux", species, models, config_data, period=period ) @@ -52,7 +54,7 @@ def test_scale_by_sector_proportions(): sector_file=sector_file, create_region_sector_totals=create_region_sector_totals, sectors=["agriculture"], - cell_area_test_file=True + cell_area_test_file=True, ) # variables flux_agriculture_prior, flux_agriculture_posterior, diff --git a/tests/test_io.py b/tests/test_io.py index 5f4357d3..1630ef82 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -19,7 +19,13 @@ def test_read_flux(model, add_sites_to_flux): period = "yearly" ds_all_flux = read_model_output( - data_dir, "flux", species, [model], config_data, period=period, add_sites_to_flux=add_sites_to_flux + data_dir, + "flux", + species, + [model], + config_data, + period=period, + add_sites_to_flux=add_sites_to_flux, ) assert model in ds_all_flux, f"Model {model} not found in the loaded flux datasets" @@ -38,7 +44,9 @@ def test_read_mf(model): data_dir, "concentration", species, [model], config_data, period=period ) - assert model in ds_all_mf, f"Model {model} not found in the loaded mole fraction datasets" + assert ( + model in ds_all_mf + ), f"Model {model} not found in the loaded mole fraction datasets" def test_read_config_default(): diff --git a/tests/test_plots.py b/tests/test_plots.py index a49d7334..a31afae7 100644 --- a/tests/test_plots.py +++ b/tests/test_plots.py @@ -23,7 +23,9 @@ from fluxie.operators.mf import compute_mf_difference from fluxie.plots.mf_stats import plot_stats_mf, plot_taylor_diagram from fluxie.test_utils import data_dir -from fluxie.operators.flux_scale_by_sector_proportions import scale_by_sector_proportions +from fluxie.operators.flux_scale_by_sector_proportions import ( + scale_by_sector_proportions, +) config_data = read_config_files() annotate_coords = set_print_settings() diff --git a/tests/utils/test_format_region.py b/tests/utils/test_format_region.py index 1c0a4db3..4d0356e1 100644 --- a/tests/utils/test_format_region.py +++ b/tests/utils/test_format_region.py @@ -16,9 +16,12 @@ def test_format_default(): def test_no_region(): """Test that format_plot_regions raises ValueError when called without plot_regions or ds_all.""" - with pytest.raises(ValueError, match="ds_all must be provided if plot_regions is None."): + with pytest.raises( + ValueError, match="ds_all must be provided if plot_regions is None." + ): format_plot_regions() + def test_format_from_ds_all(): ds_mock = { "model1": xr.Dataset( @@ -36,7 +39,7 @@ def test_format_from_ds_all(): formatted_regions = format_plot_regions(ds_all=ds_mock) assert isinstance(formatted_regions, list) - for c in ['FRA', 'DEU']: + for c in ["FRA", "DEU"]: assert c in formatted_regions - for c in ['ITA', 'ESP']: + for c in ["ITA", "ESP"]: assert c not in formatted_regions