Skip to content

Commit 80ac9d5

Browse files
authored
provide aerosol data via pooch (#267)
* provide aerosol data via pooch * fix hash * fix correct docstring * changelog
1 parent de55522 commit 80ac9d5

File tree

8 files changed

+61
-2071
lines changed

8 files changed

+61
-2071
lines changed

CHANGELOG.rst

+5
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ New Features
6262
<https://github.com/MESMER-group/mesmer/pull/220>`_). By `Mathias Hauser
6363
<https://github.com/mathause>`_.
6464

65+
- The aerosol data is now automatically downloaded using `pooch <https://www.fatiando.org/pooch/latest/>`__.
66+
(`#267 <https://github.com/MESMER-group/mesmer/pull/267>`_). By `Mathias Hauser
67+
<https://github.com/mathause>`_.
68+
69+
6570
Breaking changes
6671
^^^^^^^^^^^^^^^^
6772

data/README.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# mesmer data
2+
3+
This folder contains auxiliary data for mesmer. They are downloaded on demand using [pooch](https://www.fatiando.org/pooch/latest/).

environment.yml

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ dependencies:
1212
- numpy
1313
- packaging
1414
- pandas<2.0
15+
- pooch
1516
- regionmask>=0.8
1617
- scikit-learn
1718
- sphinx

mesmer/calibrate_mesmer/train_gt.py

+11-7
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
Functions to train global trend module of MESMER.
77
"""
88

9+
import warnings
910

1011
import numpy as np
1112
import xarray as xr
@@ -126,7 +127,7 @@ def train_gt(var, targ, esm, time, cfg, save_params=True):
126127
var_all = np.unique(var_all, axis=0)
127128

128129
params_gt["saod"], params_gt["hist"] = train_gt_ic_OLSVOLC(
129-
var_all, gt_lowess_hist, params_gt["time"]["hist"], cfg
130+
var_all, gt_lowess_hist, params_gt["time"]["hist"]
130131
)
131132
elif params_gt["method"] == "LOWESS":
132133
params_gt["hist"] = gt_lowess_hist
@@ -201,7 +202,7 @@ def train_gt_ic_LOWESS(data):
201202
return gt_lowess, frac_lowess_name
202203

203204

204-
def train_gt_ic_OLSVOLC(var, gt_lowess, time, cfg):
205+
def train_gt_ic_OLSVOLC(var, gt_lowess, time, cfg=None):
205206
"""
206207
Derive global trend (emissions + volcanoes) parameters from single ESM ic ensemble
207208
by adding volcanic spikes to LOWESS trend.
@@ -214,8 +215,8 @@ def train_gt_ic_OLSVOLC(var, gt_lowess, time, cfg):
214215
1d array of smooth global trend of variable
215216
time : np.ndarray
216217
1d array of years
217-
cfg : module
218-
config file containing metadata needed to load in stratospheric AOD time series
218+
cfg : None
219+
Passing cfg is no longer required.
219220
220221
Returns
221222
-------
@@ -231,14 +232,17 @@ def train_gt_ic_OLSVOLC(var, gt_lowess, time, cfg):
231232
232233
"""
233234

234-
# specify necessary variables from cfg file
235-
dir_obs = cfg.dir_obs
235+
if cfg is not None:
236+
warnings.warn(
237+
"Passing ``cfg`` to ``train_gt_ic_OLSVOLC`` is no longer necessary",
238+
FutureWarning,
239+
)
236240

237241
nr_runs, nr_ts = var.shape
238242

239243
# account for volcanic eruptions in historical time period
240244
# load in observed stratospheric aerosol optical depth
241-
aod_obs = load_strat_aod(time, dir_obs)
245+
aod_obs = load_strat_aod(time)
242246
# drop "year" coords - aod_obs does not have coords (currently)
243247
aod_obs = aod_obs.drop_vars("year")
244248

mesmer/core/_data.py

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import pooch
2+
3+
import mesmer
4+
5+
6+
def fetch_remote_data(name):
7+
"""
8+
uses pooch to cache files
9+
"""
10+
11+
cache_dir = pooch.os_cache("mesmer")
12+
13+
REMOTE_RESSOURCE = pooch.create(
14+
path=cache_dir,
15+
# The remote data is on Github
16+
base_url="https://github.com/MESMER-group/mesmer/raw/{version}/data/",
17+
registry={
18+
"isaod_gl_2022.dat": "3d26e78bf0ee96a02c99e2a7a448dafda0ac847a5c914a75c7d9745e95fe68ee",
19+
},
20+
version=f"v{mesmer.__version__}",
21+
version_dev="main",
22+
)
23+
24+
# the file will be downloaded automatically the first time this is run.
25+
return REMOTE_RESSOURCE.fetch(name)

mesmer/io/load_obs.py

+15-5
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,14 @@
77
"""
88

99
import os
10+
import warnings
1011

1112
import numpy as np
1213
import pandas as pd
1314
import xarray as xr
1415

16+
from mesmer.core._data import fetch_remote_data
17+
1518

1619
def load_obs(targ, prod, lon, lat, cfg, sel_ref="native", ignore_nans=True):
1720
"""Load observations which you previously downloaded.
@@ -163,15 +166,15 @@ def load_obs_tblend(prod, lon, lat, cfg, sel_ref):
163166
return tblend, time
164167

165168

166-
def load_strat_aod(time, dir_obs):
169+
def load_strat_aod(time, dir_obs=None):
167170
"""Load observed global stratospheric aerosol optical depth time series.
168171
169172
Parameters
170173
----------
171174
time : np.ndarray
172175
1d array of years the AOD time series is required for
173-
dir_obs : str
174-
pathway to observations
176+
dir_obs : None
177+
Deprecated.
175178
176179
Returns
177180
-------
@@ -184,9 +187,16 @@ def load_strat_aod(time, dir_obs):
184187
cimp6, 1850 - 2005 for cmip5)
185188
"""
186189

187-
path_file = os.path.join(dir_obs, "aerosols", "isaod_gl.dat")
190+
if dir_obs is not None:
191+
warnings.warn(
192+
"The aerosol data is now shipped with mesmer. Passing `dir_obs` to "
193+
"``load_strat_aod`` is no longer necessary",
194+
FutureWarning,
195+
)
196+
197+
filename = fetch_remote_data("isaod_gl_2022.dat")
188198
df = pd.read_csv(
189-
path_file,
199+
filename,
190200
delim_whitespace=True,
191201
skiprows=11,
192202
names=("year", "month", "AOD"),

setup.cfg

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ install_requires =
3636
numpy
3737
packaging
3838
pandas < 2.0
39+
pooch
3940
regionmask
4041
scikit-learn
4142
statsmodels

0 commit comments

Comments
 (0)