Skip to content

Commit 1c786d3

Browse files
codeflare-machine-accountopenshift-ci[bot]
authored andcommittedOct 11, 2023
Changes in docs for release: v0.9.0
1 parent 7f4710e commit 1c786d3

File tree

4 files changed

+263
-78
lines changed

4 files changed

+263
-78
lines changed
 

‎docs/cluster/cluster.html

+88-19
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,9 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.cluster</code></h1>
141141

142142
name = self.config.name
143143
namespace = self.config.namespace
144+
head_cpus = self.config.head_cpus
145+
head_memory = self.config.head_memory
146+
head_gpus = self.config.head_gpus
144147
min_cpu = self.config.min_cpus
145148
max_cpu = self.config.max_cpus
146149
min_memory = self.config.min_memory
@@ -158,6 +161,9 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.cluster</code></h1>
158161
return generate_appwrapper(
159162
name=name,
160163
namespace=namespace,
164+
head_cpus=head_cpus,
165+
head_memory=head_memory,
166+
head_gpus=head_gpus,
161167
min_cpu=min_cpu,
162168
max_cpu=max_cpu,
163169
min_memory=min_memory,
@@ -290,7 +296,7 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.cluster</code></h1>
290296
else:
291297
return False
292298

293-
def wait_ready(self, timeout: Optional[int] = None):
299+
def wait_ready(self, timeout: Optional[int] = None, dashboard_check: bool = True):
294300
&#34;&#34;&#34;
295301
Waits for requested cluster to be ready, up to an optional timeout (s).
296302
Checks every five seconds.
@@ -300,19 +306,32 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.cluster</code></h1>
300306
dashboard_ready = False
301307
status = None
302308
time = 0
303-
while not ready or not dashboard_ready:
309+
while not ready:
304310
status, ready = self.status(print_to_console=False)
305-
dashboard_ready = self.is_dashboard_ready()
306311
if status == CodeFlareClusterStatus.UNKNOWN:
307312
print(
308313
&#34;WARNING: Current cluster status is unknown, have you run cluster.up yet?&#34;
309314
)
310-
if not ready or not dashboard_ready:
315+
if not ready:
316+
if timeout and time &gt;= timeout:
317+
raise TimeoutError(
318+
f&#34;wait() timed out after waiting {timeout}s for cluster to be ready&#34;
319+
)
320+
sleep(5)
321+
time += 5
322+
print(&#34;Requested cluster is up and running!&#34;)
323+
324+
while dashboard_check and not dashboard_ready:
325+
dashboard_ready = self.is_dashboard_ready()
326+
if not dashboard_ready:
311327
if timeout and time &gt;= timeout:
312-
raise TimeoutError(f&#34;wait() timed out after waiting {timeout}s&#34;)
328+
raise TimeoutError(
329+
f&#34;wait() timed out after waiting {timeout}s for dashboard to be ready&#34;
330+
)
313331
sleep(5)
314332
time += 5
315-
print(&#34;Requested cluster and dashboard are up and running!&#34;)
333+
if dashboard_ready:
334+
print(&#34;Dashboard is ready!&#34;)
316335

317336
def details(self, print_to_console: bool = True) -&gt; RayCluster:
318337
cluster = _copy_to_ray(self)
@@ -640,6 +659,15 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.cluster</code></h1>
640659
worker_gpu=0, # hard to detect currently how many gpus, can override it with what the user asked for
641660
namespace=rc[&#34;metadata&#34;][&#34;namespace&#34;],
642661
dashboard=ray_route,
662+
head_cpus=rc[&#34;spec&#34;][&#34;headGroupSpec&#34;][&#34;template&#34;][&#34;spec&#34;][&#34;containers&#34;][0][
663+
&#34;resources&#34;
664+
][&#34;limits&#34;][&#34;cpu&#34;],
665+
head_mem=rc[&#34;spec&#34;][&#34;headGroupSpec&#34;][&#34;template&#34;][&#34;spec&#34;][&#34;containers&#34;][0][
666+
&#34;resources&#34;
667+
][&#34;limits&#34;][&#34;memory&#34;],
668+
head_gpu=rc[&#34;spec&#34;][&#34;headGroupSpec&#34;][&#34;template&#34;][&#34;spec&#34;][&#34;containers&#34;][0][
669+
&#34;resources&#34;
670+
][&#34;limits&#34;][&#34;nvidia.com/gpu&#34;],
643671
)
644672

645673

@@ -670,6 +698,9 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.cluster</code></h1>
670698
worker_gpu=cluster.config.num_gpus,
671699
namespace=cluster.config.namespace,
672700
dashboard=cluster.cluster_dashboard_uri(),
701+
head_cpus=cluster.config.head_cpus,
702+
head_mem=cluster.config.head_memory,
703+
head_gpu=cluster.config.head_gpus,
673704
)
674705
if ray.status == CodeFlareClusterStatus.READY:
675706
ray.status = RayClusterStatus.READY
@@ -879,6 +910,9 @@ <h2 class="section-title" id="header-classes">Classes</h2>
879910

880911
name = self.config.name
881912
namespace = self.config.namespace
913+
head_cpus = self.config.head_cpus
914+
head_memory = self.config.head_memory
915+
head_gpus = self.config.head_gpus
882916
min_cpu = self.config.min_cpus
883917
max_cpu = self.config.max_cpus
884918
min_memory = self.config.min_memory
@@ -896,6 +930,9 @@ <h2 class="section-title" id="header-classes">Classes</h2>
896930
return generate_appwrapper(
897931
name=name,
898932
namespace=namespace,
933+
head_cpus=head_cpus,
934+
head_memory=head_memory,
935+
head_gpus=head_gpus,
899936
min_cpu=min_cpu,
900937
max_cpu=max_cpu,
901938
min_memory=min_memory,
@@ -1028,7 +1065,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
10281065
else:
10291066
return False
10301067

1031-
def wait_ready(self, timeout: Optional[int] = None):
1068+
def wait_ready(self, timeout: Optional[int] = None, dashboard_check: bool = True):
10321069
&#34;&#34;&#34;
10331070
Waits for requested cluster to be ready, up to an optional timeout (s).
10341071
Checks every five seconds.
@@ -1038,19 +1075,32 @@ <h2 class="section-title" id="header-classes">Classes</h2>
10381075
dashboard_ready = False
10391076
status = None
10401077
time = 0
1041-
while not ready or not dashboard_ready:
1078+
while not ready:
10421079
status, ready = self.status(print_to_console=False)
1043-
dashboard_ready = self.is_dashboard_ready()
10441080
if status == CodeFlareClusterStatus.UNKNOWN:
10451081
print(
10461082
&#34;WARNING: Current cluster status is unknown, have you run cluster.up yet?&#34;
10471083
)
1048-
if not ready or not dashboard_ready:
1084+
if not ready:
1085+
if timeout and time &gt;= timeout:
1086+
raise TimeoutError(
1087+
f&#34;wait() timed out after waiting {timeout}s for cluster to be ready&#34;
1088+
)
1089+
sleep(5)
1090+
time += 5
1091+
print(&#34;Requested cluster is up and running!&#34;)
1092+
1093+
while dashboard_check and not dashboard_ready:
1094+
dashboard_ready = self.is_dashboard_ready()
1095+
if not dashboard_ready:
10491096
if timeout and time &gt;= timeout:
1050-
raise TimeoutError(f&#34;wait() timed out after waiting {timeout}s&#34;)
1097+
raise TimeoutError(
1098+
f&#34;wait() timed out after waiting {timeout}s for dashboard to be ready&#34;
1099+
)
10511100
sleep(5)
10521101
time += 5
1053-
print(&#34;Requested cluster and dashboard are up and running!&#34;)
1102+
if dashboard_ready:
1103+
print(&#34;Dashboard is ready!&#34;)
10541104

10551105
def details(self, print_to_console: bool = True) -&gt; RayCluster:
10561106
cluster = _copy_to_ray(self)
@@ -1267,6 +1317,9 @@ <h3>Methods</h3>
12671317

12681318
name = self.config.name
12691319
namespace = self.config.namespace
1320+
head_cpus = self.config.head_cpus
1321+
head_memory = self.config.head_memory
1322+
head_gpus = self.config.head_gpus
12701323
min_cpu = self.config.min_cpus
12711324
max_cpu = self.config.max_cpus
12721325
min_memory = self.config.min_memory
@@ -1284,6 +1337,9 @@ <h3>Methods</h3>
12841337
return generate_appwrapper(
12851338
name=name,
12861339
namespace=namespace,
1340+
head_cpus=head_cpus,
1341+
head_memory=head_memory,
1342+
head_gpus=head_gpus,
12871343
min_cpu=min_cpu,
12881344
max_cpu=max_cpu,
12891345
min_memory=min_memory,
@@ -1653,7 +1709,7 @@ <h3>Methods</h3>
16531709
</details>
16541710
</dd>
16551711
<dt id="codeflare_sdk.cluster.cluster.Cluster.wait_ready"><code class="name flex">
1656-
<span>def <span class="ident">wait_ready</span></span>(<span>self, timeout: Optional[int] = None)</span>
1712+
<span>def <span class="ident">wait_ready</span></span>(<span>self, timeout: Optional[int] = None, dashboard_check: bool = True)</span>
16571713
</code></dt>
16581714
<dd>
16591715
<div class="desc"><p>Waits for requested cluster to be ready, up to an optional timeout (s).
@@ -1662,7 +1718,7 @@ <h3>Methods</h3>
16621718
<summary>
16631719
<span>Expand source code</span>
16641720
</summary>
1665-
<pre><code class="python">def wait_ready(self, timeout: Optional[int] = None):
1721+
<pre><code class="python">def wait_ready(self, timeout: Optional[int] = None, dashboard_check: bool = True):
16661722
&#34;&#34;&#34;
16671723
Waits for requested cluster to be ready, up to an optional timeout (s).
16681724
Checks every five seconds.
@@ -1672,19 +1728,32 @@ <h3>Methods</h3>
16721728
dashboard_ready = False
16731729
status = None
16741730
time = 0
1675-
while not ready or not dashboard_ready:
1731+
while not ready:
16761732
status, ready = self.status(print_to_console=False)
1677-
dashboard_ready = self.is_dashboard_ready()
16781733
if status == CodeFlareClusterStatus.UNKNOWN:
16791734
print(
16801735
&#34;WARNING: Current cluster status is unknown, have you run cluster.up yet?&#34;
16811736
)
1682-
if not ready or not dashboard_ready:
1737+
if not ready:
1738+
if timeout and time &gt;= timeout:
1739+
raise TimeoutError(
1740+
f&#34;wait() timed out after waiting {timeout}s for cluster to be ready&#34;
1741+
)
1742+
sleep(5)
1743+
time += 5
1744+
print(&#34;Requested cluster is up and running!&#34;)
1745+
1746+
while dashboard_check and not dashboard_ready:
1747+
dashboard_ready = self.is_dashboard_ready()
1748+
if not dashboard_ready:
16831749
if timeout and time &gt;= timeout:
1684-
raise TimeoutError(f&#34;wait() timed out after waiting {timeout}s&#34;)
1750+
raise TimeoutError(
1751+
f&#34;wait() timed out after waiting {timeout}s for dashboard to be ready&#34;
1752+
)
16851753
sleep(5)
16861754
time += 5
1687-
print(&#34;Requested cluster and dashboard are up and running!&#34;)</code></pre>
1755+
if dashboard_ready:
1756+
print(&#34;Dashboard is ready!&#34;)</code></pre>
16881757
</details>
16891758
</dd>
16901759
</dl>

‎docs/cluster/config.html

+24-3
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.config</code></h1>
6666
name: str
6767
namespace: str = None
6868
head_info: list = field(default_factory=list)
69+
head_cpus: int = 2
70+
head_memory: int = 8
71+
head_gpus: int = 0
6972
machine_types: list = field(default_factory=list) # [&#34;m4.xlarge&#34;, &#34;g4dn.xlarge&#34;]
7073
min_cpus: int = 1
7174
max_cpus: int = 1
@@ -76,7 +79,7 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.config</code></h1>
7679
template: str = f&#34;{dir}/templates/base-template.yaml&#34;
7780
instascale: bool = False
7881
envs: dict = field(default_factory=dict)
79-
image: str = &#34;quay.io/project-codeflare/ray:2.5.0-py38-cu116&#34;
82+
image: str = &#34;quay.io/project-codeflare/ray:latest-py39-cu118&#34;
8083
local_interactive: bool = False
8184
image_pull_secrets: list = field(default_factory=list)
8285
dispatch_priority: str = None</code></pre>
@@ -93,7 +96,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
9396
<dl>
9497
<dt id="codeflare_sdk.cluster.config.ClusterConfiguration"><code class="flex name class">
9598
<span>class <span class="ident">ClusterConfiguration</span></span>
96-
<span>(</span><span>name: str, namespace: str = None, head_info: list = &lt;factory&gt;, machine_types: list = &lt;factory&gt;, min_cpus: int = 1, max_cpus: int = 1, num_workers: int = 1, min_memory: int = 2, max_memory: int = 2, num_gpus: int = 0, template: str = '/home/runner/work/codeflare-sdk/codeflare-sdk/src/codeflare_sdk/templates/base-template.yaml', instascale: bool = False, envs: dict = &lt;factory&gt;, image: str = 'quay.io/project-codeflare/ray:2.5.0-py38-cu116', local_interactive: bool = False, image_pull_secrets: list = &lt;factory&gt;, dispatch_priority: str = None)</span>
99+
<span>(</span><span>name: str, namespace: str = None, head_info: list = &lt;factory&gt;, head_cpus: int = 2, head_memory: int = 8, head_gpus: int = 0, machine_types: list = &lt;factory&gt;, min_cpus: int = 1, max_cpus: int = 1, num_workers: int = 1, min_memory: int = 2, max_memory: int = 2, num_gpus: int = 0, template: str = '/home/runner/work/codeflare-sdk/codeflare-sdk/src/codeflare_sdk/templates/base-template.yaml', instascale: bool = False, envs: dict = &lt;factory&gt;, image: str = 'quay.io/project-codeflare/ray:latest-py39-cu118', local_interactive: bool = False, image_pull_secrets: list = &lt;factory&gt;, dispatch_priority: str = None)</span>
97100
</code></dt>
98101
<dd>
99102
<div class="desc"><p>This dataclass is used to specify resource requirements and other details, and
@@ -111,6 +114,9 @@ <h2 class="section-title" id="header-classes">Classes</h2>
111114
name: str
112115
namespace: str = None
113116
head_info: list = field(default_factory=list)
117+
head_cpus: int = 2
118+
head_memory: int = 8
119+
head_gpus: int = 0
114120
machine_types: list = field(default_factory=list) # [&#34;m4.xlarge&#34;, &#34;g4dn.xlarge&#34;]
115121
min_cpus: int = 1
116122
max_cpus: int = 1
@@ -121,7 +127,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
121127
template: str = f&#34;{dir}/templates/base-template.yaml&#34;
122128
instascale: bool = False
123129
envs: dict = field(default_factory=dict)
124-
image: str = &#34;quay.io/project-codeflare/ray:2.5.0-py38-cu116&#34;
130+
image: str = &#34;quay.io/project-codeflare/ray:latest-py39-cu118&#34;
125131
local_interactive: bool = False
126132
image_pull_secrets: list = field(default_factory=list)
127133
dispatch_priority: str = None</code></pre>
@@ -136,10 +142,22 @@ <h3>Class variables</h3>
136142
<dd>
137143
<div class="desc"></div>
138144
</dd>
145+
<dt id="codeflare_sdk.cluster.config.ClusterConfiguration.head_cpus"><code class="name">var <span class="ident">head_cpus</span> : int</code></dt>
146+
<dd>
147+
<div class="desc"></div>
148+
</dd>
149+
<dt id="codeflare_sdk.cluster.config.ClusterConfiguration.head_gpus"><code class="name">var <span class="ident">head_gpus</span> : int</code></dt>
150+
<dd>
151+
<div class="desc"></div>
152+
</dd>
139153
<dt id="codeflare_sdk.cluster.config.ClusterConfiguration.head_info"><code class="name">var <span class="ident">head_info</span> : list</code></dt>
140154
<dd>
141155
<div class="desc"></div>
142156
</dd>
157+
<dt id="codeflare_sdk.cluster.config.ClusterConfiguration.head_memory"><code class="name">var <span class="ident">head_memory</span> : int</code></dt>
158+
<dd>
159+
<div class="desc"></div>
160+
</dd>
143161
<dt id="codeflare_sdk.cluster.config.ClusterConfiguration.image"><code class="name">var <span class="ident">image</span> : str</code></dt>
144162
<dd>
145163
<div class="desc"></div>
@@ -219,7 +237,10 @@ <h4><code><a title="codeflare_sdk.cluster.config.ClusterConfiguration" href="#co
219237
<ul class="two-column">
220238
<li><code><a title="codeflare_sdk.cluster.config.ClusterConfiguration.dispatch_priority" href="#codeflare_sdk.cluster.config.ClusterConfiguration.dispatch_priority">dispatch_priority</a></code></li>
221239
<li><code><a title="codeflare_sdk.cluster.config.ClusterConfiguration.envs" href="#codeflare_sdk.cluster.config.ClusterConfiguration.envs">envs</a></code></li>
240+
<li><code><a title="codeflare_sdk.cluster.config.ClusterConfiguration.head_cpus" href="#codeflare_sdk.cluster.config.ClusterConfiguration.head_cpus">head_cpus</a></code></li>
241+
<li><code><a title="codeflare_sdk.cluster.config.ClusterConfiguration.head_gpus" href="#codeflare_sdk.cluster.config.ClusterConfiguration.head_gpus">head_gpus</a></code></li>
222242
<li><code><a title="codeflare_sdk.cluster.config.ClusterConfiguration.head_info" href="#codeflare_sdk.cluster.config.ClusterConfiguration.head_info">head_info</a></code></li>
243+
<li><code><a title="codeflare_sdk.cluster.config.ClusterConfiguration.head_memory" href="#codeflare_sdk.cluster.config.ClusterConfiguration.head_memory">head_memory</a></code></li>
223244
<li><code><a title="codeflare_sdk.cluster.config.ClusterConfiguration.image" href="#codeflare_sdk.cluster.config.ClusterConfiguration.image">image</a></code></li>
224245
<li><code><a title="codeflare_sdk.cluster.config.ClusterConfiguration.image_pull_secrets" href="#codeflare_sdk.cluster.config.ClusterConfiguration.image_pull_secrets">image_pull_secrets</a></code></li>
225246
<li><code><a title="codeflare_sdk.cluster.config.ClusterConfiguration.instascale" href="#codeflare_sdk.cluster.config.ClusterConfiguration.instascale">instascale</a></code></li>

‎docs/cluster/model.html

+22-1
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.model</code></h1>
102102

103103
name: str
104104
status: RayClusterStatus
105+
head_cpus: int
106+
head_mem: str
107+
head_gpu: int
105108
workers: int
106109
worker_mem_min: str
107110
worker_mem_max: str
@@ -287,7 +290,7 @@ <h3>Class variables</h3>
287290
</dd>
288291
<dt id="codeflare_sdk.cluster.model.RayCluster"><code class="flex name class">
289292
<span>class <span class="ident">RayCluster</span></span>
290-
<span>(</span><span>name: str, status: <a title="codeflare_sdk.cluster.model.RayClusterStatus" href="#codeflare_sdk.cluster.model.RayClusterStatus">RayClusterStatus</a>, workers: int, worker_mem_min: str, worker_mem_max: str, worker_cpu: int, worker_gpu: int, namespace: str, dashboard: str)</span>
293+
<span>(</span><span>name: str, status: <a title="codeflare_sdk.cluster.model.RayClusterStatus" href="#codeflare_sdk.cluster.model.RayClusterStatus">RayClusterStatus</a>, head_cpus: int, head_mem: str, head_gpu: int, workers: int, worker_mem_min: str, worker_mem_max: str, worker_cpu: int, worker_gpu: int, namespace: str, dashboard: str)</span>
291294
</code></dt>
292295
<dd>
293296
<div class="desc"><p>For storing information about a Ray cluster.</p></div>
@@ -302,6 +305,9 @@ <h3>Class variables</h3>
302305

303306
name: str
304307
status: RayClusterStatus
308+
head_cpus: int
309+
head_mem: str
310+
head_gpu: int
305311
workers: int
306312
worker_mem_min: str
307313
worker_mem_max: str
@@ -316,6 +322,18 @@ <h3>Class variables</h3>
316322
<dd>
317323
<div class="desc"></div>
318324
</dd>
325+
<dt id="codeflare_sdk.cluster.model.RayCluster.head_cpus"><code class="name">var <span class="ident">head_cpus</span> : int</code></dt>
326+
<dd>
327+
<div class="desc"></div>
328+
</dd>
329+
<dt id="codeflare_sdk.cluster.model.RayCluster.head_gpu"><code class="name">var <span class="ident">head_gpu</span> : int</code></dt>
330+
<dd>
331+
<div class="desc"></div>
332+
</dd>
333+
<dt id="codeflare_sdk.cluster.model.RayCluster.head_mem"><code class="name">var <span class="ident">head_mem</span> : str</code></dt>
334+
<dd>
335+
<div class="desc"></div>
336+
</dd>
319337
<dt id="codeflare_sdk.cluster.model.RayCluster.name"><code class="name">var <span class="ident">name</span> : str</code></dt>
320338
<dd>
321339
<div class="desc"></div>
@@ -447,6 +465,9 @@ <h4><code><a title="codeflare_sdk.cluster.model.CodeFlareClusterStatus" href="#c
447465
<h4><code><a title="codeflare_sdk.cluster.model.RayCluster" href="#codeflare_sdk.cluster.model.RayCluster">RayCluster</a></code></h4>
448466
<ul class="two-column">
449467
<li><code><a title="codeflare_sdk.cluster.model.RayCluster.dashboard" href="#codeflare_sdk.cluster.model.RayCluster.dashboard">dashboard</a></code></li>
468+
<li><code><a title="codeflare_sdk.cluster.model.RayCluster.head_cpus" href="#codeflare_sdk.cluster.model.RayCluster.head_cpus">head_cpus</a></code></li>
469+
<li><code><a title="codeflare_sdk.cluster.model.RayCluster.head_gpu" href="#codeflare_sdk.cluster.model.RayCluster.head_gpu">head_gpu</a></code></li>
470+
<li><code><a title="codeflare_sdk.cluster.model.RayCluster.head_mem" href="#codeflare_sdk.cluster.model.RayCluster.head_mem">head_mem</a></code></li>
450471
<li><code><a title="codeflare_sdk.cluster.model.RayCluster.name" href="#codeflare_sdk.cluster.model.RayCluster.name">name</a></code></li>
451472
<li><code><a title="codeflare_sdk.cluster.model.RayCluster.namespace" href="#codeflare_sdk.cluster.model.RayCluster.namespace">namespace</a></code></li>
452473
<li><code><a title="codeflare_sdk.cluster.model.RayCluster.status" href="#codeflare_sdk.cluster.model.RayCluster.status">status</a></code></li>

‎docs/utils/generate_yaml.html

+129-55
Original file line numberDiff line numberDiff line change
@@ -138,35 +138,51 @@ <h1 class="title">Module <code>codeflare_sdk.utils.generate_yaml</code></h1>
138138

139139

140140
def update_custompodresources(
141-
item, min_cpu, max_cpu, min_memory, max_memory, gpu, workers
141+
item,
142+
min_cpu,
143+
max_cpu,
144+
min_memory,
145+
max_memory,
146+
gpu,
147+
workers,
148+
head_cpus,
149+
head_memory,
150+
head_gpus,
142151
):
143152
if &#34;custompodresources&#34; in item.keys():
144153
custompodresources = item.get(&#34;custompodresources&#34;)
145154
for i in range(len(custompodresources)):
155+
resource = custompodresources[i]
146156
if i == 0:
147157
# Leave head node resources as template default
148-
continue
149-
resource = custompodresources[i]
150-
for k, v in resource.items():
151-
if k == &#34;replicas&#34; and i == 1:
152-
resource[k] = workers
153-
if k == &#34;requests&#34; or k == &#34;limits&#34;:
154-
for spec, _ in v.items():
155-
if spec == &#34;cpu&#34;:
156-
if k == &#34;limits&#34;:
157-
resource[k][spec] = max_cpu
158-
else:
159-
resource[k][spec] = min_cpu
160-
if spec == &#34;memory&#34;:
161-
if k == &#34;limits&#34;:
162-
resource[k][spec] = str(max_memory) + &#34;G&#34;
163-
else:
164-
resource[k][spec] = str(min_memory) + &#34;G&#34;
165-
if spec == &#34;nvidia.com/gpu&#34;:
166-
if i == 0:
167-
resource[k][spec] = 0
168-
else:
169-
resource[k][spec] = gpu
158+
resource[&#34;requests&#34;][&#34;cpu&#34;] = head_cpus
159+
resource[&#34;limits&#34;][&#34;cpu&#34;] = head_cpus
160+
resource[&#34;requests&#34;][&#34;memory&#34;] = str(head_memory) + &#34;G&#34;
161+
resource[&#34;limits&#34;][&#34;memory&#34;] = str(head_memory) + &#34;G&#34;
162+
resource[&#34;requests&#34;][&#34;nvidia.com/gpu&#34;] = head_gpus
163+
resource[&#34;limits&#34;][&#34;nvidia.com/gpu&#34;] = head_gpus
164+
165+
else:
166+
for k, v in resource.items():
167+
if k == &#34;replicas&#34; and i == 1:
168+
resource[k] = workers
169+
if k == &#34;requests&#34; or k == &#34;limits&#34;:
170+
for spec, _ in v.items():
171+
if spec == &#34;cpu&#34;:
172+
if k == &#34;limits&#34;:
173+
resource[k][spec] = max_cpu
174+
else:
175+
resource[k][spec] = min_cpu
176+
if spec == &#34;memory&#34;:
177+
if k == &#34;limits&#34;:
178+
resource[k][spec] = str(max_memory) + &#34;G&#34;
179+
else:
180+
resource[k][spec] = str(min_memory) + &#34;G&#34;
181+
if spec == &#34;nvidia.com/gpu&#34;:
182+
if i == 0:
183+
resource[k][spec] = 0
184+
else:
185+
resource[k][spec] = gpu
170186
else:
171187
sys.exit(&#34;Error: malformed template&#34;)
172188

@@ -236,11 +252,15 @@ <h1 class="title">Module <code>codeflare_sdk.utils.generate_yaml</code></h1>
236252
instascale,
237253
env,
238254
image_pull_secrets,
255+
head_cpus,
256+
head_memory,
257+
head_gpus,
239258
):
240259
if &#34;generictemplate&#34; in item.keys():
241260
head = item.get(&#34;generictemplate&#34;).get(&#34;spec&#34;).get(&#34;headGroupSpec&#34;)
242-
worker = item.get(&#34;generictemplate&#34;).get(&#34;spec&#34;).get(&#34;workerGroupSpecs&#34;)[0]
261+
head[&#34;rayStartParams&#34;][&#34;num-gpus&#34;] = str(int(head_gpus))
243262

263+
worker = item.get(&#34;generictemplate&#34;).get(&#34;spec&#34;).get(&#34;workerGroupSpecs&#34;)[0]
244264
# Head counts as first worker
245265
worker[&#34;replicas&#34;] = workers
246266
worker[&#34;minReplicas&#34;] = workers
@@ -256,7 +276,9 @@ <h1 class="title">Module <code>codeflare_sdk.utils.generate_yaml</code></h1>
256276
update_env(spec, env)
257277
if comp == head:
258278
# TODO: Eventually add head node configuration outside of template
259-
continue
279+
update_resources(
280+
spec, head_cpus, head_cpus, head_memory, head_memory, head_gpus
281+
)
260282
else:
261283
update_resources(spec, min_cpu, max_cpu, min_memory, max_memory, gpu)
262284

@@ -381,6 +403,9 @@ <h1 class="title">Module <code>codeflare_sdk.utils.generate_yaml</code></h1>
381403
def generate_appwrapper(
382404
name: str,
383405
namespace: str,
406+
head_cpus: int,
407+
head_memory: int,
408+
head_gpus: int,
384409
min_cpu: int,
385410
max_cpu: int,
386411
min_memory: int,
@@ -406,7 +431,16 @@ <h1 class="title">Module <code>codeflare_sdk.utils.generate_yaml</code></h1>
406431
update_labels(user_yaml, instascale, instance_types)
407432
update_priority(user_yaml, item, dispatch_priority, priority_val)
408433
update_custompodresources(
409-
item, min_cpu, max_cpu, min_memory, max_memory, gpu, workers
434+
item,
435+
min_cpu,
436+
max_cpu,
437+
min_memory,
438+
max_memory,
439+
gpu,
440+
workers,
441+
head_cpus,
442+
head_memory,
443+
head_gpus,
410444
)
411445
update_nodes(
412446
item,
@@ -421,6 +455,9 @@ <h1 class="title">Module <code>codeflare_sdk.utils.generate_yaml</code></h1>
421455
instascale,
422456
env,
423457
image_pull_secrets,
458+
head_cpus,
459+
head_memory,
460+
head_gpus,
424461
)
425462
update_dashboard_route(route_item, cluster_name, namespace)
426463
if local_interactive:
@@ -577,7 +614,7 @@ <h2 class="section-title" id="header-functions">Functions</h2>
577614
</details>
578615
</dd>
579616
<dt id="codeflare_sdk.utils.generate_yaml.generate_appwrapper"><code class="name flex">
580-
<span>def <span class="ident">generate_appwrapper</span></span>(<span>name: str, namespace: str, min_cpu: int, max_cpu: int, min_memory: int, max_memory: int, gpu: int, workers: int, template: str, image: str, instascale: bool, instance_types: list, env, local_interactive: bool, image_pull_secrets: list, dispatch_priority: str, priority_val: int)</span>
617+
<span>def <span class="ident">generate_appwrapper</span></span>(<span>name: str, namespace: str, head_cpus: int, head_memory: int, head_gpus: int, min_cpu: int, max_cpu: int, min_memory: int, max_memory: int, gpu: int, workers: int, template: str, image: str, instascale: bool, instance_types: list, env, local_interactive: bool, image_pull_secrets: list, dispatch_priority: str, priority_val: int)</span>
581618
</code></dt>
582619
<dd>
583620
<div class="desc"></div>
@@ -588,6 +625,9 @@ <h2 class="section-title" id="header-functions">Functions</h2>
588625
<pre><code class="python">def generate_appwrapper(
589626
name: str,
590627
namespace: str,
628+
head_cpus: int,
629+
head_memory: int,
630+
head_gpus: int,
591631
min_cpu: int,
592632
max_cpu: int,
593633
min_memory: int,
@@ -613,7 +653,16 @@ <h2 class="section-title" id="header-functions">Functions</h2>
613653
update_labels(user_yaml, instascale, instance_types)
614654
update_priority(user_yaml, item, dispatch_priority, priority_val)
615655
update_custompodresources(
616-
item, min_cpu, max_cpu, min_memory, max_memory, gpu, workers
656+
item,
657+
min_cpu,
658+
max_cpu,
659+
min_memory,
660+
max_memory,
661+
gpu,
662+
workers,
663+
head_cpus,
664+
head_memory,
665+
head_gpus,
617666
)
618667
update_nodes(
619668
item,
@@ -628,6 +677,9 @@ <h2 class="section-title" id="header-functions">Functions</h2>
628677
instascale,
629678
env,
630679
image_pull_secrets,
680+
head_cpus,
681+
head_memory,
682+
head_gpus,
631683
)
632684
update_dashboard_route(route_item, cluster_name, namespace)
633685
if local_interactive:
@@ -700,7 +752,7 @@ <h2 class="section-title" id="header-functions">Functions</h2>
700752
</details>
701753
</dd>
702754
<dt id="codeflare_sdk.utils.generate_yaml.update_custompodresources"><code class="name flex">
703-
<span>def <span class="ident">update_custompodresources</span></span>(<span>item, min_cpu, max_cpu, min_memory, max_memory, gpu, workers)</span>
755+
<span>def <span class="ident">update_custompodresources</span></span>(<span>item, min_cpu, max_cpu, min_memory, max_memory, gpu, workers, head_cpus, head_memory, head_gpus)</span>
704756
</code></dt>
705757
<dd>
706758
<div class="desc"></div>
@@ -709,35 +761,51 @@ <h2 class="section-title" id="header-functions">Functions</h2>
709761
<span>Expand source code</span>
710762
</summary>
711763
<pre><code class="python">def update_custompodresources(
712-
item, min_cpu, max_cpu, min_memory, max_memory, gpu, workers
764+
item,
765+
min_cpu,
766+
max_cpu,
767+
min_memory,
768+
max_memory,
769+
gpu,
770+
workers,
771+
head_cpus,
772+
head_memory,
773+
head_gpus,
713774
):
714775
if &#34;custompodresources&#34; in item.keys():
715776
custompodresources = item.get(&#34;custompodresources&#34;)
716777
for i in range(len(custompodresources)):
778+
resource = custompodresources[i]
717779
if i == 0:
718780
# Leave head node resources as template default
719-
continue
720-
resource = custompodresources[i]
721-
for k, v in resource.items():
722-
if k == &#34;replicas&#34; and i == 1:
723-
resource[k] = workers
724-
if k == &#34;requests&#34; or k == &#34;limits&#34;:
725-
for spec, _ in v.items():
726-
if spec == &#34;cpu&#34;:
727-
if k == &#34;limits&#34;:
728-
resource[k][spec] = max_cpu
729-
else:
730-
resource[k][spec] = min_cpu
731-
if spec == &#34;memory&#34;:
732-
if k == &#34;limits&#34;:
733-
resource[k][spec] = str(max_memory) + &#34;G&#34;
734-
else:
735-
resource[k][spec] = str(min_memory) + &#34;G&#34;
736-
if spec == &#34;nvidia.com/gpu&#34;:
737-
if i == 0:
738-
resource[k][spec] = 0
739-
else:
740-
resource[k][spec] = gpu
781+
resource[&#34;requests&#34;][&#34;cpu&#34;] = head_cpus
782+
resource[&#34;limits&#34;][&#34;cpu&#34;] = head_cpus
783+
resource[&#34;requests&#34;][&#34;memory&#34;] = str(head_memory) + &#34;G&#34;
784+
resource[&#34;limits&#34;][&#34;memory&#34;] = str(head_memory) + &#34;G&#34;
785+
resource[&#34;requests&#34;][&#34;nvidia.com/gpu&#34;] = head_gpus
786+
resource[&#34;limits&#34;][&#34;nvidia.com/gpu&#34;] = head_gpus
787+
788+
else:
789+
for k, v in resource.items():
790+
if k == &#34;replicas&#34; and i == 1:
791+
resource[k] = workers
792+
if k == &#34;requests&#34; or k == &#34;limits&#34;:
793+
for spec, _ in v.items():
794+
if spec == &#34;cpu&#34;:
795+
if k == &#34;limits&#34;:
796+
resource[k][spec] = max_cpu
797+
else:
798+
resource[k][spec] = min_cpu
799+
if spec == &#34;memory&#34;:
800+
if k == &#34;limits&#34;:
801+
resource[k][spec] = str(max_memory) + &#34;G&#34;
802+
else:
803+
resource[k][spec] = str(min_memory) + &#34;G&#34;
804+
if spec == &#34;nvidia.com/gpu&#34;:
805+
if i == 0:
806+
resource[k][spec] = 0
807+
else:
808+
resource[k][spec] = gpu
741809
else:
742810
sys.exit(&#34;Error: malformed template&#34;)</code></pre>
743811
</details>
@@ -855,7 +923,7 @@ <h2 class="section-title" id="header-functions">Functions</h2>
855923
</details>
856924
</dd>
857925
<dt id="codeflare_sdk.utils.generate_yaml.update_nodes"><code class="name flex">
858-
<span>def <span class="ident">update_nodes</span></span>(<span>item, appwrapper_name, min_cpu, max_cpu, min_memory, max_memory, gpu, workers, image, instascale, env, image_pull_secrets)</span>
926+
<span>def <span class="ident">update_nodes</span></span>(<span>item, appwrapper_name, min_cpu, max_cpu, min_memory, max_memory, gpu, workers, image, instascale, env, image_pull_secrets, head_cpus, head_memory, head_gpus)</span>
859927
</code></dt>
860928
<dd>
861929
<div class="desc"></div>
@@ -876,11 +944,15 @@ <h2 class="section-title" id="header-functions">Functions</h2>
876944
instascale,
877945
env,
878946
image_pull_secrets,
947+
head_cpus,
948+
head_memory,
949+
head_gpus,
879950
):
880951
if &#34;generictemplate&#34; in item.keys():
881952
head = item.get(&#34;generictemplate&#34;).get(&#34;spec&#34;).get(&#34;headGroupSpec&#34;)
882-
worker = item.get(&#34;generictemplate&#34;).get(&#34;spec&#34;).get(&#34;workerGroupSpecs&#34;)[0]
953+
head[&#34;rayStartParams&#34;][&#34;num-gpus&#34;] = str(int(head_gpus))
883954

955+
worker = item.get(&#34;generictemplate&#34;).get(&#34;spec&#34;).get(&#34;workerGroupSpecs&#34;)[0]
884956
# Head counts as first worker
885957
worker[&#34;replicas&#34;] = workers
886958
worker[&#34;minReplicas&#34;] = workers
@@ -896,7 +968,9 @@ <h2 class="section-title" id="header-functions">Functions</h2>
896968
update_env(spec, env)
897969
if comp == head:
898970
# TODO: Eventually add head node configuration outside of template
899-
continue
971+
update_resources(
972+
spec, head_cpus, head_cpus, head_memory, head_memory, head_gpus
973+
)
900974
else:
901975
update_resources(spec, min_cpu, max_cpu, min_memory, max_memory, gpu)</code></pre>
902976
</details>

0 commit comments

Comments
 (0)
Please sign in to comment.