Skip to content

Commit fa97b64

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 3c74682 commit fa97b64

11 files changed

+622
-379
lines changed

docs/drift_removal.ipynb

Lines changed: 92 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
],
4444
"source": [
4545
"from dask_gateway import Gateway\n",
46+
"\n",
4647
"g = Gateway()\n",
4748
"running_clusters = g.list_clusters()\n",
4849
"print(running_clusters)\n",
@@ -77,6 +78,7 @@
7778
"source": [
7879
"from distributed import Client\n",
7980
"from dask_gateway import GatewayCluster\n",
81+
"\n",
8082
"cluster = GatewayCluster()\n",
8183
"cluster.scale(30)\n",
8284
"cluster"
@@ -255,18 +257,22 @@
255257
}
256258
],
257259
"source": [
258-
"zkwargs = {'consolidated':True, 'use_cftime':True}\n",
259-
"kwargs = {'zarr_kwargs':zkwargs, 'preprocess':combined_preprocessing, 'aggregate':False}\n",
260+
"zkwargs = {\"consolidated\": True, \"use_cftime\": True}\n",
261+
"kwargs = {\n",
262+
" \"zarr_kwargs\": zkwargs,\n",
263+
" \"preprocess\": combined_preprocessing,\n",
264+
" \"aggregate\": False,\n",
265+
"}\n",
260266
"\n",
261267
"col = google_cmip_col()\n",
262268
"\n",
263269
"\n",
264-
"cat = col.search(source_id='CanESM5-CanOE', variable_id='thetao')\n",
270+
"cat = col.search(source_id=\"CanESM5-CanOE\", variable_id=\"thetao\")\n",
265271
"\n",
266272
"\n",
267-
"ddict_historical = cat.search(experiment_id='historical').to_dataset_dict(**kwargs)\n",
268-
"ddict_ssp585 = cat.search(experiment_id='ssp585').to_dataset_dict(**kwargs)\n",
269-
"ddict_picontrol = cat.search(experiment_id='piControl').to_dataset_dict(**kwargs)"
273+
"ddict_historical = cat.search(experiment_id=\"historical\").to_dataset_dict(**kwargs)\n",
274+
"ddict_ssp585 = cat.search(experiment_id=\"ssp585\").to_dataset_dict(**kwargs)\n",
275+
"ddict_picontrol = cat.search(experiment_id=\"piControl\").to_dataset_dict(**kwargs)"
270276
]
271277
},
272278
{
@@ -286,12 +292,18 @@
286292
"metadata": {},
287293
"outputs": [],
288294
"source": [
289-
"ds_control = ddict_picontrol['CMIP.CCCma.CanESM5-CanOE.piControl.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CCCma/CanESM5-CanOE/piControl/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429']\n",
290-
"ds_historical = ddict_historical['CMIP.CCCma.CanESM5-CanOE.historical.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CCCma/CanESM5-CanOE/historical/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429']\n",
291-
"ds_ssp585 = ddict_ssp585['ScenarioMIP.CCCma.CanESM5-CanOE.ssp585.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/ScenarioMIP/CCCma/CanESM5-CanOE/ssp585/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429']\n",
295+
"ds_control = ddict_picontrol[\n",
296+
" \"CMIP.CCCma.CanESM5-CanOE.piControl.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CCCma/CanESM5-CanOE/piControl/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429\"\n",
297+
"]\n",
298+
"ds_historical = ddict_historical[\n",
299+
" \"CMIP.CCCma.CanESM5-CanOE.historical.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/CMIP/CCCma/CanESM5-CanOE/historical/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429\"\n",
300+
"]\n",
301+
"ds_ssp585 = ddict_ssp585[\n",
302+
" \"ScenarioMIP.CCCma.CanESM5-CanOE.ssp585.r1i1p2f1.Omon.thetao.gn.gs://cmip6/CMIP6/ScenarioMIP/CCCma/CanESM5-CanOE/ssp585/r1i1p2f1/Omon/thetao/gn/v20190429/.nan.20190429\"\n",
303+
"]\n",
292304
"\n",
293305
"# Pick a random location in x/y/z space to use as an exmple\n",
294-
"roi = {'x':100,'y':220, 'lev':30}"
306+
"roi = {\"x\": 100, \"y\": 220, \"lev\": 30}"
295307
]
296308
},
297309
{
@@ -335,8 +347,8 @@
335347
],
336348
"source": [
337349
"# ok lets just plot them together\n",
338-
"ds_control.isel(**roi).thetao.plot(color='0.5')\n",
339-
"ds_historical.isel(**roi).thetao.plot(color='C1')"
350+
"ds_control.isel(**roi).thetao.plot(color=\"0.5\")\n",
351+
"ds_historical.isel(**roi).thetao.plot(color=\"C1\")"
340352
]
341353
},
342354
{
@@ -373,7 +385,7 @@
373385
}
374386
],
375387
"source": [
376-
"{k:v for k,v in ds_historical.attrs.items() if 'parent' in k}"
388+
"{k: v for k, v in ds_historical.attrs.items() if \"parent\" in k}"
377389
]
378390
},
379391
{
@@ -428,8 +440,8 @@
428440
],
429441
"source": [
430442
"# ok lets just plot them together\n",
431-
"ds_control_adj.isel(**roi).thetao.plot(color='0.5')\n",
432-
"ds_historical_adj.isel(**roi).thetao.plot(color='C1')"
443+
"ds_control_adj.isel(**roi).thetao.plot(color=\"0.5\")\n",
444+
"ds_historical_adj.isel(**roi).thetao.plot(color=\"C1\")"
433445
]
434446
},
435447
{
@@ -471,8 +483,8 @@
471483
],
472484
"source": [
473485
"# ok lets just plot them together\n",
474-
"ds_control_adj.isel(**roi, time=slice(0,24)).thetao.plot()\n",
475-
"ds_historical_adj.isel(**roi, time=slice(0,24)).thetao.plot()"
486+
"ds_control_adj.isel(**roi, time=slice(0, 24)).thetao.plot()\n",
487+
"ds_historical_adj.isel(**roi, time=slice(0, 24)).thetao.plot()"
476488
]
477489
},
478490
{
@@ -491,8 +503,9 @@
491503
"outputs": [],
492504
"source": [
493505
"from xmip.drift_removal import replace_time\n",
506+
"\n",
494507
"# with the defaults it will just replace the dates with new ones which have time stamps at the beginning of the month.\n",
495-
"ds_historical_adj = replace_time(ds_historical_adj) "
508+
"ds_historical_adj = replace_time(ds_historical_adj)"
496509
]
497510
},
498511
{
@@ -526,8 +539,8 @@
526539
],
527540
"source": [
528541
"# ok lets just plot them together again\n",
529-
"ds_control_adj.isel(**roi, time=slice(0,24)).thetao.plot()\n",
530-
"ds_historical_adj.isel(**roi, time=slice(0,24)).thetao.plot()"
542+
"ds_control_adj.isel(**roi, time=slice(0, 24)).thetao.plot()\n",
543+
"ds_historical_adj.isel(**roi, time=slice(0, 24)).thetao.plot()"
531544
]
532545
},
533546
{
@@ -564,7 +577,7 @@
564577
],
565578
"source": [
566579
"for name, ds in ddict_historical.items():\n",
567-
" print(name, ds.attrs['branch_time_in_parent'])"
580+
" print(name, ds.attrs[\"branch_time_in_parent\"])"
568581
]
569582
},
570583
{
@@ -598,14 +611,18 @@
598611
"# replace the timestamp with the first of the month for the control run and plot\n",
599612
"# we will also average the data yearly to remove some of the visual noise\n",
600613
"\n",
601-
"plt.figure(figsize=[12,4])\n",
602-
"replace_time(ds_control).isel(**roi).thetao.coarsen(time=3).mean().isel(time=slice(0,150*4)).plot(color='0.5')\n",
614+
"plt.figure(figsize=[12, 4])\n",
615+
"replace_time(ds_control).isel(**roi).thetao.coarsen(time=3).mean().isel(\n",
616+
" time=slice(0, 150 * 4)\n",
617+
").plot(color=\"0.5\")\n",
603618
"\n",
604-
"# now we loop through all the historical members, adjust the time and plot them in the same way, \n",
619+
"# now we loop through all the historical members, adjust the time and plot them in the same way,\n",
605620
"# but only for the first 20 years\n",
606621
"for name, ds in ddict_historical.items():\n",
607-
" _, ds_adj = unify_time(ds_control, ds, adjust_to='parent')\n",
608-
" ds_adj.isel(**roi).thetao.coarsen(time=3).mean().isel(time=slice(0,30*4)).plot(color='C1')"
622+
" _, ds_adj = unify_time(ds_control, ds, adjust_to=\"parent\")\n",
623+
" ds_adj.isel(**roi).thetao.coarsen(time=3).mean().isel(time=slice(0, 30 * 4)).plot(\n",
624+
" color=\"C1\"\n",
625+
" )"
609626
]
610627
},
611628
{
@@ -654,9 +671,10 @@
654671
"# setting up the scratch bucket\n",
655672
"import os\n",
656673
"import fsspec\n",
657-
"PANGEO_SCRATCH = os.environ['PANGEO_SCRATCH']+'cmip6_pp_demo'\n",
658-
"path = f'{PANGEO_SCRATCH}/test_rechunked.zarr'\n",
659-
"temp_path = f'{PANGEO_SCRATCH}/test_rechunked_temp.zarr'\n",
674+
"\n",
675+
"PANGEO_SCRATCH = os.environ[\"PANGEO_SCRATCH\"] + \"cmip6_pp_demo\"\n",
676+
"path = f\"{PANGEO_SCRATCH}/test_rechunked.zarr\"\n",
677+
"temp_path = f\"{PANGEO_SCRATCH}/test_rechunked_temp.zarr\"\n",
660678
"mapper = fsspec.get_mapper(path)\n",
661679
"mapper_temp = fsspec.get_mapper(temp_path)"
662680
]
@@ -1506,27 +1524,30 @@
15061524
"source": [
15071525
"if not mapper.fs.exists(path):\n",
15081526
" # recompute the rechunked data into the scratch bucket (is only triggered when the temporary store was erased)\n",
1509-
" \n",
1527+
"\n",
15101528
" # Remove the temp store if for some reason that still exists\n",
15111529
" if mapper.fs.exists(temp_path):\n",
15121530
" mapper.fs.rm(temp_path, recursive=True)\n",
15131531
" from rechunker import rechunk\n",
1532+
"\n",
15141533
" target_chunks = {\n",
1515-
" 'thetao': {'time':6012, 'lev':1, 'x':3, 'y':291},\n",
1516-
" 'x': {'x':3},\n",
1517-
" 'y': {'y':291},\n",
1518-
" 'lat': {'x':3, 'y':291},\n",
1519-
" 'lev': {'lev':1},\n",
1520-
" 'lon': {'x':3, 'y':291},\n",
1521-
" 'time': {'time':6012}, \n",
1534+
" \"thetao\": {\"time\": 6012, \"lev\": 1, \"x\": 3, \"y\": 291},\n",
1535+
" \"x\": {\"x\": 3},\n",
1536+
" \"y\": {\"y\": 291},\n",
1537+
" \"lat\": {\"x\": 3, \"y\": 291},\n",
1538+
" \"lev\": {\"lev\": 1},\n",
1539+
" \"lon\": {\"x\": 3, \"y\": 291},\n",
1540+
" \"time\": {\"time\": 6012},\n",
15221541
" }\n",
1523-
" max_mem = '1GB'\n",
1542+
" max_mem = \"1GB\"\n",
15241543
"\n",
1525-
" array_plan = rechunk(ds_control[['thetao']], target_chunks, max_mem, mapper, temp_store=mapper_temp)\n",
1544+
" array_plan = rechunk(\n",
1545+
" ds_control[[\"thetao\"]], target_chunks, max_mem, mapper, temp_store=mapper_temp\n",
1546+
" )\n",
15261547
" array_plan.execute(retries=10)\n",
1527-
" \n",
1548+
"\n",
15281549
"ds_control_rechunked = xr.open_zarr(mapper, use_cftime=True)\n",
1529-
"ds_control_rechunked "
1550+
"ds_control_rechunked"
15301551
]
15311552
},
15321553
{
@@ -2061,8 +2082,8 @@
20612082
}
20622083
],
20632084
"source": [
2064-
"drift = calculate_drift(ds_control_rechunked, ds_historical, 'thetao') \n",
2065-
"drift = drift.load() # This takes a bit, but it is worth loading this small output to avoid repeated computation\n",
2085+
"drift = calculate_drift(ds_control_rechunked, ds_historical, \"thetao\")\n",
2086+
"drift = drift.load() # This takes a bit, but it is worth loading this small output to avoid repeated computation\n",
20662087
"drift"
20672088
]
20682089
},
@@ -2116,13 +2137,18 @@
21162137
"source": [
21172138
"start = drift.trend_time_range.isel(bnds=0).data.tolist()\n",
21182139
"stop = drift.trend_time_range.isel(bnds=1).data.tolist()\n",
2119-
"time = xr.cftime_range(start, stop, freq='1MS')\n",
2140+
"time = xr.cftime_range(start, stop, freq=\"1MS\")\n",
21202141
"\n",
21212142
"# cut the control it to the time over which the trend was calculated\n",
21222143
"ds_control_cut = ds_control_rechunked.sel(time=slice(start, stop))\n",
21232144
"\n",
21242145
"# use the linear slope from the same point to construct a trendline\n",
2125-
"trendline = xr.DataArray((np.arange(len(time)) * drift.thetao.isel(**roi).data) + ds_control_cut.thetao.isel(**roi, time=0).data, dims=['time'], coords={'time':time})"
2146+
"trendline = xr.DataArray(\n",
2147+
" (np.arange(len(time)) * drift.thetao.isel(**roi).data)\n",
2148+
" + ds_control_cut.thetao.isel(**roi, time=0).data,\n",
2149+
" dims=[\"time\"],\n",
2150+
" coords={\"time\": time},\n",
2151+
")"
21262152
]
21272153
},
21282154
{
@@ -2205,7 +2231,9 @@
22052231
}
22062232
],
22072233
"source": [
2208-
"control_detrended = remove_trend(ds_control, drift, 'thetao', ref_date=str(ds_control.time.data[0]))\n",
2234+
"control_detrended = remove_trend(\n",
2235+
" ds_control, drift, \"thetao\", ref_date=str(ds_control.time.data[0])\n",
2236+
")\n",
22092237
"control_detrended.isel(**roi).plot()"
22102238
]
22112239
},
@@ -2247,7 +2275,9 @@
22472275
}
22482276
],
22492277
"source": [
2250-
"ds_historical_dedrifted = remove_trend(ds_historical, drift, 'thetao', ref_date=str(ds_historical.time.data[0]))\n",
2278+
"ds_historical_dedrifted = remove_trend(\n",
2279+
" ds_historical, drift, \"thetao\", ref_date=str(ds_historical.time.data[0])\n",
2280+
")\n",
22512281
"ds_historical_dedrifted.isel(**roi).plot()"
22522282
]
22532283
},
@@ -2866,7 +2896,7 @@
28662896
}
28672897
],
28682898
"source": [
2869-
"ds_historical_dedrifted.attrs['drift_removed']"
2899+
"ds_historical_dedrifted.attrs[\"drift_removed\"]"
28702900
]
28712901
},
28722902
{
@@ -2930,10 +2960,10 @@
29302960
"ds_ssp585_dedrifted = remove_trend(\n",
29312961
" ds_ssp585,\n",
29322962
" drift,\n",
2933-
" 'thetao',\n",
2934-
" ref_date=str(ds_historical.time.data[0]) \n",
2935-
" # Note that the ref_date is still the first time point of the *historical*run. \n",
2936-
" # This ensures that the scenario is treated as an extension of the historical \n",
2963+
" \"thetao\",\n",
2964+
" ref_date=str(ds_historical.time.data[0]),\n",
2965+
" # Note that the ref_date is still the first time point of the *historical*run.\n",
2966+
" # This ensures that the scenario is treated as an extension of the historical\n",
29372967
" # run and the offset is calculated appropriately\n",
29382968
")"
29392969
]
@@ -2968,10 +2998,16 @@
29682998
}
29692999
],
29703000
"source": [
2971-
"ds_historical.isel(**roi).thetao.coarsen(time=36, boundary='trim').mean().plot(color='C0', label='raw data')\n",
2972-
"ds_ssp585.isel(**roi).thetao.coarsen(time=36, boundary='trim').mean().plot(color='C0')\n",
2973-
"ds_historical_dedrifted.isel(**roi).coarsen(time=36, boundary='trim').mean().plot(color='C1', label='control drift removed')\n",
2974-
"ds_ssp585_dedrifted.isel(**roi).coarsen(time=36, boundary='trim').mean().plot(color='C1')"
3001+
"ds_historical.isel(**roi).thetao.coarsen(time=36, boundary=\"trim\").mean().plot(\n",
3002+
" color=\"C0\", label=\"raw data\"\n",
3003+
")\n",
3004+
"ds_ssp585.isel(**roi).thetao.coarsen(time=36, boundary=\"trim\").mean().plot(color=\"C0\")\n",
3005+
"ds_historical_dedrifted.isel(**roi).coarsen(time=36, boundary=\"trim\").mean().plot(\n",
3006+
" color=\"C1\", label=\"control drift removed\"\n",
3007+
")\n",
3008+
"ds_ssp585_dedrifted.isel(**roi).coarsen(time=36, boundary=\"trim\").mean().plot(\n",
3009+
" color=\"C1\"\n",
3010+
")"
29753011
]
29763012
},
29773013
{

0 commit comments

Comments
 (0)