diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 5e7d83b..60b187f 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -34,6 +34,8 @@ Upcoming Release * Add `purge` and `all` rules to clean all generated outputs and regenerate them (https://github.com/PyPSA/technology-data/pull/187) +* Switch to `python-calamine` as engine for reading Excel files in `pandas`, greatly improving performance (https://github.com/PyPSA/technology-data/pull/188) + `v0.11.0 `__ (24th January 2025) ======================================================================================= diff --git a/environment.yaml b/environment.yaml index 2b995d7..ce3817a 100644 --- a/environment.yaml +++ b/environment.yaml @@ -19,6 +19,7 @@ dependencies: - xlrd - scipy - openpyxl>=3.1.2 +- python-calamine - packaging - pytest @@ -26,6 +27,9 @@ dependencies: - pre-commit - ruff +# Interactive execution +- ipykernel + - pip: - tabula-py - currencyconverter diff --git a/scripts/compile_cost_assumptions.py b/scripts/compile_cost_assumptions.py index f9f8de7..07929e4 100644 --- a/scripts/compile_cost_assumptions.py +++ b/scripts/compile_cost_assumptions.py @@ -376,6 +376,7 @@ def get_dea_maritime_data( index_col=[0, 1], usecols="A:F", na_values="N/A", + engine="calamine", ) wished_index = [ @@ -500,6 +501,7 @@ def get_dea_vehicle_data( index_col=0, usecols="A:F", na_values="no data", + engine="calamine", ) wished_index = [ @@ -667,6 +669,7 @@ def get_data_DEA( usecols=usecols, skiprows=skiprows, na_values="N.A", + engine="calamine", ) excel.dropna(axis=1, how="all", inplace=True) @@ -2439,6 +2442,7 @@ def add_gas_storage( gas_storage_file_name, sheet_name="150 Underground Storage of Gas", index_col=1, + engine="calamine", ) gas_storage.dropna(axis=1, how="all", inplace=True) @@ -3666,6 +3670,7 @@ def add_energy_storage_database( "ref_size_MW": float, "EP_ratio_h": float, }, + engine="calamine", ) df = df.drop(columns=["ref_size_MW", "EP_ratio_h"]) df = df.fillna(df.dtypes.replace({"float64": 0.0, "O": "NULL"})) @@ -3939,7 +3944,9 @@ def prepare_inflation_rate(fn: str) -> pd.Series: inflation rates series """ - inflation_rate = pd.read_excel(fn, sheet_name="Sheet 1", index_col=0, header=[8]) + inflation_rate = pd.read_excel( + fn, sheet_name="Sheet 1", index_col=0, header=[8], engine="calamine" + ) inflation_rate = ( inflation_rate.loc["European Union - 27 countries (from 2020)"].dropna() ).loc["2001"::]