Skip to content

Commit 81cd70b

Browse files
authored
Merge pull request #7 from hrhampapura/harsha
Update notebook
2 parents 560b91a + 835226a commit 81cd70b

File tree

1 file changed

+120
-83
lines changed

1 file changed

+120
-83
lines changed

notebooks/01_data_preprocessing.ipynb

+120-83
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
"- If you don't have write permision to save to the Research Data Archive (RDA), please save the result to your local folder.\n",
2020
"- If you need annual means for the following variables, please don't run this notebook. The data has already been calculated and can be accessed via https from https://data.rda.ucar.edu/pythia_era5_24/annual_means/\n",
2121
"\n",
22-
" - Air temperature at 2 m/ VAR_2T\n",
22+
" 1) Air temperature at 2 m/ VAR_2T\n",
2323
" \n",
2424
"- Otherwise, please run this script once to generate the annual means.\n"
2525
]
@@ -64,15 +64,15 @@
6464
},
6565
{
6666
"cell_type": "code",
67-
"execution_count": 4,
67+
"execution_count": 18,
6868
"id": "4bee4557-d1f1-4720-bf61-a09f106f41ba",
6969
"metadata": {},
7070
"outputs": [
7171
{
7272
"name": "stdout",
7373
"output_type": "stream",
7474
"text": [
75-
"https://data.rda.ucar.edu/pythia_era5_24/annual_means/\n"
75+
"/gpfs/csfs1/collections/rda/data/pythia_era5_24/annual_means/\n"
7676
]
7777
}
7878
],
@@ -96,7 +96,7 @@
9696
"id": "20a20dff-a028-4e38-a7b7-8bfb670bdf01",
9797
"metadata": {},
9898
"source": [
99-
"### Create a Dask cluster"
99+
"## Create a Dask cluster"
100100
]
101101
},
102102
{
@@ -140,11 +140,13 @@
140140
]
141141
},
142142
{
143-
"cell_type": "markdown",
144-
"id": "1a995e3d-2be7-414e-a7bf-7c53178d44d2",
143+
"cell_type": "code",
144+
"execution_count": 12,
145+
"id": "f842eb2a-1eab-4991-b75b-1d996a5b9006",
145146
"metadata": {},
147+
"outputs": [],
146148
"source": [
147-
"USE_PBS_SCHEDULER = False"
149+
"USE_PBS_SCHEDULER = True"
148150
]
149151
},
150152
{
@@ -156,9 +158,11 @@
156158
]
157159
},
158160
{
159-
"cell_type": "markdown",
160-
"id": "8df9739b-5005-4c0d-bf5e-4dc4cc432f50",
161+
"cell_type": "code",
162+
"execution_count": 13,
163+
"id": "0993ff8d-9f8d-4e18-be23-44eb9db2a92a",
161164
"metadata": {},
165+
"outputs": [],
162166
"source": [
163167
"USE_DASK_GATEWAY = False"
164168
]
@@ -168,81 +172,117 @@
168172
"id": "5fa1388b-d977-42f4-878f-46dd2cdee653",
169173
"metadata": {},
170174
"source": [
171-
"## Spin up a PBS cluster"
175+
"**Python function for a PBS cluster**"
172176
]
173177
},
174178
{
175179
"cell_type": "code",
176-
"execution_count": 26,
180+
"execution_count": 14,
177181
"id": "d1421ec6-4b2f-46be-aba2-6db5943317ae",
178182
"metadata": {},
179183
"outputs": [],
180184
"source": [
181185
"# Create a PBS cluster object\n",
182-
"cluster = PBSCluster(\n",
183-
" job_name = 'dask-wk24-hpc',\n",
184-
" cores = 1,\n",
185-
" memory = '8GiB',\n",
186-
" processes = 1,\n",
187-
" local_directory = rda_scratch+'/dask/spill',\n",
188-
" log_directory = rda_scratch +'/dask/',\n",
189-
" resource_spec = 'select=1:ncpus=1:mem=8GB',\n",
190-
" queue = 'casper',\n",
191-
" walltime = '3:30:00',\n",
192-
" #interface = 'ib0'\n",
193-
" interface = 'ext'\n",
194-
")"
186+
"def get_pbs_cluster():\n",
187+
" \"\"\" Create cluster through dask_jobqueue. \n",
188+
" \"\"\"\n",
189+
" from dask_jobqueue import PBSCluster\n",
190+
" cluster = PBSCluster(\n",
191+
" job_name = 'dask-pythia-24',\n",
192+
" cores = 1,\n",
193+
" memory = '4GiB',\n",
194+
" processes = 1,\n",
195+
" local_directory = rda_scratch + '/dask/spill',\n",
196+
" resource_spec = 'select=1:ncpus=1:mem=8GB',\n",
197+
" queue = 'casper',\n",
198+
" walltime = '1:00:00',\n",
199+
" #interface = 'ib0'\n",
200+
" interface = 'ext'\n",
201+
" )\n",
202+
" return cluster"
203+
]
204+
},
205+
{
206+
"cell_type": "markdown",
207+
"id": "ea025c5b-5fb0-4fd2-aed1-a9df881f400e",
208+
"metadata": {},
209+
"source": [
210+
"**Python function for a Gateway Cluster**"
211+
]
212+
},
213+
{
214+
"cell_type": "code",
215+
"execution_count": 15,
216+
"id": "d4329409-77e2-463f-8e59-d882354560f3",
217+
"metadata": {},
218+
"outputs": [],
219+
"source": [
220+
"def get_gateway_cluster():\n",
221+
" \"\"\" Create cluster through dask_gateway\n",
222+
" \"\"\"\n",
223+
" from dask_gateway import Gateway\n",
224+
"\n",
225+
" gateway = Gateway()\n",
226+
" cluster = gateway.new_cluster()\n",
227+
" cluster.adapt(minimum=2, maximum=4)\n",
228+
" return cluster"
229+
]
230+
},
231+
{
232+
"cell_type": "markdown",
233+
"id": "6a28f49a-754f-4a68-95f0-4ff6f595f9b5",
234+
"metadata": {},
235+
"source": [
236+
"**Python function for a Local Cluster**"
237+
]
238+
},
239+
{
240+
"cell_type": "code",
241+
"execution_count": 16,
242+
"id": "1fd3d476-5002-43b0-93ef-a2aed6182cba",
243+
"metadata": {},
244+
"outputs": [],
245+
"source": [
246+
"def get_local_cluster():\n",
247+
" \"\"\" Create cluster using the Jupyter server's resources\n",
248+
" \"\"\"\n",
249+
" from distributed import LocalCluster, performance_report\n",
250+
" cluster = LocalCluster() \n",
251+
"\n",
252+
" cluster.scale(4)\n",
253+
" return cluster"
254+
]
255+
},
256+
{
257+
"cell_type": "markdown",
258+
"id": "7a2a2abb-4ac3-4061-8270-f988ba1a820e",
259+
"metadata": {},
260+
"source": [
261+
"**Python logic to select the Dask Cluster type**\n",
262+
"\n",
263+
"This uses True/False boolean logic based on the variables set in the previous cells"
195264
]
196265
},
197266
{
198267
"cell_type": "code",
199-
"execution_count": 27,
200-
"id": "8d0afb64-5b20-4242-8fa7-3eae60ac0a15",
268+
"execution_count": 19,
269+
"id": "f92bbbf8-f507-43a3-95d0-9f1530e14bc3",
201270
"metadata": {},
202271
"outputs": [
203272
{
204273
"data": {
274+
"application/vnd.jupyter.widget-view+json": {
275+
"model_id": "3f7db0463f0b4228aa6a6c45766bfdbb",
276+
"version_major": 2,
277+
"version_minor": 0
278+
},
205279
"text/html": [
206-
"<div>\n",
207-
" <div style=\"width: 24px; height: 24px; background-color: #e1e1e1; border: 3px solid #9D9D9D; border-radius: 5px; position: absolute;\"> </div>\n",
208-
" <div style=\"margin-left: 48px;\">\n",
209-
" <h3 style=\"margin-bottom: 0px;\">Client</h3>\n",
210-
" <p style=\"color: #9D9D9D; margin-bottom: 0px;\">Client-974c44c9-29a9-11ef-8521-3cecef19f78e</p>\n",
211-
" <table style=\"width: 100%; text-align: left;\">\n",
212-
"\n",
213-
" <tr>\n",
214-
" \n",
215-
" <td style=\"text-align: left;\"><strong>Connection method:</strong> Cluster object</td>\n",
216-
" <td style=\"text-align: left;\"><strong>Cluster type:</strong> dask_jobqueue.PBSCluster</td>\n",
217-
" \n",
218-
" </tr>\n",
219-
"\n",
220-
" \n",
221-
" <tr>\n",
222-
" <td style=\"text-align: left;\">\n",
223-
" <strong>Dashboard: </strong> <a href=\"https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/8787/status\" target=\"_blank\">https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/8787/status</a>\n",
224-
" </td>\n",
225-
" <td style=\"text-align: left;\"></td>\n",
226-
" </tr>\n",
227-
" \n",
228-
"\n",
229-
" </table>\n",
230-
"\n",
231-
" \n",
232-
" <button style=\"margin-bottom: 12px;\" data-commandlinker-command=\"dask:populate-and-launch-layout\" data-commandlinker-args='{\"url\": \"https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/8787/status\" }'>\n",
233-
" Launch dashboard in JupyterLab\n",
234-
" </button>\n",
235-
" \n",
236-
"\n",
237-
" \n",
238-
" <details>\n",
239-
" <summary style=\"margin-bottom: 20px;\"><h3 style=\"display: inline;\">Cluster Info</h3></summary>\n",
240-
" <div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-mod-trusted jp-OutputArea-output\">\n",
280+
"<div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-mod-trusted jp-OutputArea-output\">\n",
241281
" <div style=\"width: 24px; height: 24px; background-color: #e1e1e1; border: 3px solid #9D9D9D; border-radius: 5px; position: absolute;\">\n",
242282
" </div>\n",
243283
" <div style=\"margin-left: 48px;\">\n",
244284
" <h3 style=\"margin-bottom: 0px; margin-top: 0px;\">PBSCluster</h3>\n",
245-
" <p style=\"color: #9D9D9D; margin-bottom: 0px;\">f9b25755</p>\n",
285+
" <p style=\"color: #9D9D9D; margin-bottom: 0px;\">ac6b2948</p>\n",
246286
" <table style=\"width: 100%; text-align: left;\">\n",
247287
" <tr>\n",
248288
" <td style=\"text-align: left;\">\n",
@@ -273,11 +313,11 @@
273313
" <div style=\"width: 24px; height: 24px; background-color: #FFF7E5; border: 3px solid #FF6132; border-radius: 5px; position: absolute;\"> </div>\n",
274314
" <div style=\"margin-left: 48px;\">\n",
275315
" <h3 style=\"margin-bottom: 0px;\">Scheduler</h3>\n",
276-
" <p style=\"color: #9D9D9D; margin-bottom: 0px;\">Scheduler-dc88a893-99f9-43d8-8e7b-b6d0892f6fca</p>\n",
316+
" <p style=\"color: #9D9D9D; margin-bottom: 0px;\">Scheduler-33cb717b-300e-41e2-b722-a4b6cf279453</p>\n",
277317
" <table style=\"width: 100%; text-align: left;\">\n",
278318
" <tr>\n",
279319
" <td style=\"text-align: left;\">\n",
280-
" <strong>Comm:</strong> tcp://128.117.208.94:33609\n",
320+
" <strong>Comm:</strong> tcp://128.117.208.94:38149\n",
281321
" </td>\n",
282322
" <td style=\"text-align: left;\">\n",
283323
" <strong>Workers:</strong> 0\n",
@@ -315,35 +355,32 @@
315355
"\n",
316356
" </details>\n",
317357
" </div>\n",
318-
"</div>\n",
319-
" </details>\n",
320-
" \n",
321-
"\n",
322-
" </div>\n",
323358
"</div>"
324359
],
325360
"text/plain": [
326-
"<Client: 'tcp://128.117.208.94:33609' processes=0 threads=0, memory=0 B>"
361+
"PBSCluster(ac6b2948, 'tcp://128.117.208.94:38149', workers=0, threads=0, memory=0 B)"
327362
]
328363
},
329-
"execution_count": 27,
330364
"metadata": {},
331-
"output_type": "execute_result"
365+
"output_type": "display_data"
332366
}
333367
],
334368
"source": [
369+
"# Obtain dask cluster in one of three ways\n",
370+
"\n",
371+
"if USE_PBS_SCHEDULER:\n",
372+
" cluster = get_pbs_cluster()\n",
373+
"elif USE_DASK_GATEWAY:\n",
374+
" cluster = get_gateway_cluster()\n",
375+
"else:\n",
376+
" cluster = get_local_cluster()\n",
377+
"\n",
378+
"# Connect to cluster\n",
379+
"from distributed import Client\n",
335380
"client = Client(cluster)\n",
336-
"client"
337-
]
338-
},
339-
{
340-
"cell_type": "code",
341-
"execution_count": 28,
342-
"id": "d87c34fb-165a-4f48-89ea-ac6ab4f0c1be",
343-
"metadata": {},
344-
"outputs": [],
345-
"source": [
346-
"cluster.scale(30)"
381+
"\n",
382+
"# Display cluster dashboard URL\n",
383+
"cluster"
347384
]
348385
},
349386
{

0 commit comments

Comments
 (0)