@@ -50,6 +50,7 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.cluster</code></h1>
50
50
cluster setup queue, a list of all existing clusters, and the user's working namespace.
51
51
"""
52
52
53
+ import re
53
54
from time import sleep
54
55
from typing import List, Optional, Tuple, Dict
55
56
@@ -73,11 +74,13 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.cluster</code></h1>
73
74
RayClusterStatus,
74
75
)
75
76
from kubernetes import client, config
77
+ from kubernetes.utils import parse_quantity
76
78
import yaml
77
79
import os
78
80
import requests
79
81
80
82
from kubernetes import config
83
+ from kubernetes.client.rest import ApiException
81
84
82
85
83
86
class Cluster:
@@ -216,6 +219,7 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.cluster</code></h1>
216
219
write_to_file = self.config.write_to_file
217
220
verify_tls = self.config.verify_tls
218
221
local_queue = self.config.local_queue
222
+ labels = self.config.labels
219
223
return generate_appwrapper(
220
224
name=name,
221
225
namespace=namespace,
@@ -240,6 +244,7 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.cluster</code></h1>
240
244
write_to_file=write_to_file,
241
245
verify_tls=verify_tls,
242
246
local_queue=local_queue,
247
+ labels=labels,
243
248
)
244
249
245
250
# creates a new cluster with the provided or default spec
@@ -248,6 +253,10 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.cluster</code></h1>
248
253
Applies the AppWrapper yaml, pushing the resource request onto
249
254
the MCAD queue.
250
255
"""
256
+
257
+ # check if RayCluster CustomResourceDefinition exists if not throw RuntimeError
258
+ self._throw_for_no_raycluster()
259
+
251
260
namespace = self.config.namespace
252
261
253
262
try:
@@ -278,12 +287,32 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.cluster</code></h1>
278
287
except Exception as e: # pragma: no cover
279
288
return _kube_api_error_handling(e)
280
289
290
+ def _throw_for_no_raycluster(self):
291
+ api_instance = client.CustomObjectsApi(api_config_handler())
292
+ try:
293
+ api_instance.list_namespaced_custom_object(
294
+ group="ray.io",
295
+ version="v1",
296
+ namespace=self.config.namespace,
297
+ plural="rayclusters",
298
+ )
299
+ except ApiException as e:
300
+ if e.status == 404:
301
+ raise RuntimeError(
302
+ "RayCluster CustomResourceDefinition unavailable contact your administrator."
303
+ )
304
+ else:
305
+ raise RuntimeError(
306
+ "Failed to get RayCluster CustomResourceDefinition: " + str(e)
307
+ )
308
+
281
309
def down(self):
282
310
"""
283
311
Deletes the AppWrapper yaml, scaling-down and deleting all resources
284
312
associated with the cluster.
285
313
"""
286
314
namespace = self.config.namespace
315
+ self._throw_for_no_raycluster()
287
316
try:
288
317
config_check()
289
318
api_instance = client.CustomObjectsApi(api_config_handler())
@@ -520,26 +549,18 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.cluster</code></h1>
520
549
namespace=rc["metadata"]["namespace"],
521
550
machine_types=machine_types,
522
551
num_workers=rc["spec"]["workerGroupSpecs"][0]["minReplicas"],
523
- min_cpus=int(
524
- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
525
- "resources"
526
- ]["requests"]["cpu"]
527
- ),
528
- max_cpus=int(
529
- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
530
- "resources"
531
- ]["limits"]["cpu"]
532
- ),
533
- min_memory=int(
534
- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
535
- "resources"
536
- ]["requests"]["memory"][:-1]
537
- ),
538
- max_memory=int(
539
- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
540
- "resources"
541
- ]["limits"]["memory"][:-1]
542
- ),
552
+ min_cpus=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
553
+ "containers"
554
+ ][0]["resources"]["requests"]["cpu"],
555
+ max_cpus=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
556
+ "containers"
557
+ ][0]["resources"]["limits"]["cpu"],
558
+ min_memory=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
559
+ "containers"
560
+ ][0]["resources"]["requests"]["memory"],
561
+ max_memory=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
562
+ "containers"
563
+ ][0]["resources"]["limits"]["memory"],
543
564
num_gpus=int(
544
565
rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
545
566
"resources"
@@ -1265,6 +1286,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
1265
1286
write_to_file = self.config.write_to_file
1266
1287
verify_tls = self.config.verify_tls
1267
1288
local_queue = self.config.local_queue
1289
+ labels = self.config.labels
1268
1290
return generate_appwrapper(
1269
1291
name=name,
1270
1292
namespace=namespace,
@@ -1289,6 +1311,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
1289
1311
write_to_file=write_to_file,
1290
1312
verify_tls=verify_tls,
1291
1313
local_queue=local_queue,
1314
+ labels=labels,
1292
1315
)
1293
1316
1294
1317
# creates a new cluster with the provided or default spec
@@ -1297,6 +1320,10 @@ <h2 class="section-title" id="header-classes">Classes</h2>
1297
1320
Applies the AppWrapper yaml, pushing the resource request onto
1298
1321
the MCAD queue.
1299
1322
"""
1323
+
1324
+ # check if RayCluster CustomResourceDefinition exists if not throw RuntimeError
1325
+ self._throw_for_no_raycluster()
1326
+
1300
1327
namespace = self.config.namespace
1301
1328
1302
1329
try:
@@ -1327,12 +1354,32 @@ <h2 class="section-title" id="header-classes">Classes</h2>
1327
1354
except Exception as e: # pragma: no cover
1328
1355
return _kube_api_error_handling(e)
1329
1356
1357
+ def _throw_for_no_raycluster(self):
1358
+ api_instance = client.CustomObjectsApi(api_config_handler())
1359
+ try:
1360
+ api_instance.list_namespaced_custom_object(
1361
+ group="ray.io",
1362
+ version="v1",
1363
+ namespace=self.config.namespace,
1364
+ plural="rayclusters",
1365
+ )
1366
+ except ApiException as e:
1367
+ if e.status == 404:
1368
+ raise RuntimeError(
1369
+ "RayCluster CustomResourceDefinition unavailable contact your administrator."
1370
+ )
1371
+ else:
1372
+ raise RuntimeError(
1373
+ "Failed to get RayCluster CustomResourceDefinition: " + str(e)
1374
+ )
1375
+
1330
1376
def down(self):
1331
1377
"""
1332
1378
Deletes the AppWrapper yaml, scaling-down and deleting all resources
1333
1379
associated with the cluster.
1334
1380
"""
1335
1381
namespace = self.config.namespace
1382
+ self._throw_for_no_raycluster()
1336
1383
try:
1337
1384
config_check()
1338
1385
api_instance = client.CustomObjectsApi(api_config_handler())
@@ -1569,26 +1616,18 @@ <h2 class="section-title" id="header-classes">Classes</h2>
1569
1616
namespace=rc["metadata"]["namespace"],
1570
1617
machine_types=machine_types,
1571
1618
num_workers=rc["spec"]["workerGroupSpecs"][0]["minReplicas"],
1572
- min_cpus=int(
1573
- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
1574
- "resources"
1575
- ]["requests"]["cpu"]
1576
- ),
1577
- max_cpus=int(
1578
- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
1579
- "resources"
1580
- ]["limits"]["cpu"]
1581
- ),
1582
- min_memory=int(
1583
- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
1584
- "resources"
1585
- ]["requests"]["memory"][:-1]
1586
- ),
1587
- max_memory=int(
1588
- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
1589
- "resources"
1590
- ]["limits"]["memory"][:-1]
1591
- ),
1619
+ min_cpus=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
1620
+ "containers"
1621
+ ][0]["resources"]["requests"]["cpu"],
1622
+ max_cpus=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
1623
+ "containers"
1624
+ ][0]["resources"]["limits"]["cpu"],
1625
+ min_memory=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
1626
+ "containers"
1627
+ ][0]["resources"]["requests"]["memory"],
1628
+ max_memory=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
1629
+ "containers"
1630
+ ][0]["resources"]["limits"]["memory"],
1592
1631
num_gpus=int(
1593
1632
rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
1594
1633
"resources"
@@ -1811,6 +1850,7 @@ <h3>Methods</h3>
1811
1850
write_to_file = self.config.write_to_file
1812
1851
verify_tls = self.config.verify_tls
1813
1852
local_queue = self.config.local_queue
1853
+ labels = self.config.labels
1814
1854
return generate_appwrapper(
1815
1855
name=name,
1816
1856
namespace=namespace,
@@ -1835,6 +1875,7 @@ <h3>Methods</h3>
1835
1875
write_to_file=write_to_file,
1836
1876
verify_tls=verify_tls,
1837
1877
local_queue=local_queue,
1878
+ labels=labels,
1838
1879
)</ code > </ pre >
1839
1880
</ details >
1840
1881
</ dd >
@@ -1870,6 +1911,7 @@ <h3>Methods</h3>
1870
1911
associated with the cluster.
1871
1912
"""
1872
1913
namespace = self.config.namespace
1914
+ self._throw_for_no_raycluster()
1873
1915
try:
1874
1916
config_check()
1875
1917
api_instance = client.CustomObjectsApi(api_config_handler())
@@ -1944,26 +1986,18 @@ <h3>Methods</h3>
1944
1986
namespace=rc["metadata"]["namespace"],
1945
1987
machine_types=machine_types,
1946
1988
num_workers=rc["spec"]["workerGroupSpecs"][0]["minReplicas"],
1947
- min_cpus=int(
1948
- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
1949
- "resources"
1950
- ]["requests"]["cpu"]
1951
- ),
1952
- max_cpus=int(
1953
- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
1954
- "resources"
1955
- ]["limits"]["cpu"]
1956
- ),
1957
- min_memory=int(
1958
- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
1959
- "resources"
1960
- ]["requests"]["memory"][:-1]
1961
- ),
1962
- max_memory=int(
1963
- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
1964
- "resources"
1965
- ]["limits"]["memory"][:-1]
1966
- ),
1989
+ min_cpus=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
1990
+ "containers"
1991
+ ][0]["resources"]["requests"]["cpu"],
1992
+ max_cpus=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
1993
+ "containers"
1994
+ ][0]["resources"]["limits"]["cpu"],
1995
+ min_memory=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
1996
+ "containers"
1997
+ ][0]["resources"]["requests"]["memory"],
1998
+ max_memory=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
1999
+ "containers"
2000
+ ][0]["resources"]["limits"]["memory"],
1967
2001
num_gpus=int(
1968
2002
rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
1969
2003
"resources"
@@ -2168,6 +2202,10 @@ <h3>Methods</h3>
2168
2202
Applies the AppWrapper yaml, pushing the resource request onto
2169
2203
the MCAD queue.
2170
2204
"""
2205
+
2206
+ # check if RayCluster CustomResourceDefinition exists if not throw RuntimeError
2207
+ self._throw_for_no_raycluster()
2208
+
2171
2209
namespace = self.config.namespace
2172
2210
2173
2211
try:
0 commit comments