Skip to content

Commit 128a8e5

Browse files
authored
Torchx mcad coscheduler (#693)
* Enable PodGroups * Enable co-scheduler * Fix lint errors * Minor documentation fixes * Remove comment * Support different PodGroups for different Roles * Improved documentation and code readability --------- Co-authored-by: Sara Kokkila Schumacher <[email protected]>
1 parent 77c67eb commit 128a8e5

File tree

2 files changed

+161
-25
lines changed

2 files changed

+161
-25
lines changed

torchx/schedulers/kubernetes_mcad_scheduler.py

Lines changed: 84 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,9 @@
1313
Prerequisites
1414
==============
1515
16-
TorchX kubernetes scheduler depends on AppWrapper + MCAD.
17-
18-
Install MCAD
16+
TorchX Kubernetes_MCAD scheduler depends on AppWrapper + MCAD.
1917
18+
Install MCAD:
2019
See deploying Multi-Cluster-Application-Dispatcher guide
2120
https://github.com/project-codeflare/multi-cluster-app-dispatcher/blob/main/doc/deploy/deployment.md
2221
@@ -168,6 +167,7 @@ def role_to_pod(
168167
role: Role,
169168
service_account: Optional[str],
170169
image_secret: Optional[str],
170+
coscheduler_name: Optional[str],
171171
) -> "V1Pod":
172172
from kubernetes.client.models import ( # noqa: F811 redefinition of unused
173173
V1Container,
@@ -338,6 +338,7 @@ def role_to_pod(
338338
service_account_name=service_account,
339339
volumes=volumes,
340340
node_selector=node_selector,
341+
scheduler_name=coscheduler_name,
341342
),
342343
metadata=V1ObjectMeta(
343344
name=name,
@@ -352,6 +353,31 @@ def role_to_pod(
352353
)
353354

354355

356+
def create_pod_group(role: Role, namespace: str, app_id: str) -> "Dict[str, Any]":
357+
pod_group_name = app_id + "-" + cleanup_str(role.name) + "-pg"
358+
359+
pod_group: Dict[str, Any] = {
360+
"apiVersion": "scheduling.sigs.k8s.io/v1alpha1",
361+
"kind": "PodGroup",
362+
"metadata": {
363+
"name": pod_group_name,
364+
"namespace": namespace,
365+
"labels": {
366+
"appwrapper.mcad.ibm.com": app_id,
367+
},
368+
},
369+
"spec": {
370+
"minMember": role.num_replicas,
371+
},
372+
}
373+
374+
genericitem_pod_group: Dict[str, Any] = {
375+
"replicas": 1,
376+
"generictemplate": pod_group,
377+
}
378+
return genericitem_pod_group
379+
380+
355381
def mcad_svc(svc_name: str, namespace: str, service_port: str) -> "V1Service":
356382
from kubernetes.client.models import ( # noqa: F401, F811
357383
V1Container,
@@ -436,6 +462,7 @@ def app_to_resource(
436462
namespace: str,
437463
service_account: Optional[str],
438464
image_secret: Optional[str],
465+
coscheduler_name: Optional[str],
439466
priority: Optional[int] = None,
440467
) -> Dict[str, Any]:
441468
"""
@@ -448,6 +475,12 @@ def app_to_resource(
448475
genericitems = []
449476

450477
unique_app_id = cleanup_str(make_unique(app.name))
478+
479+
if coscheduler_name is not None:
480+
for role_idx, role in enumerate(app.roles):
481+
genericitem_pod_group = create_pod_group(role, namespace, unique_app_id)
482+
genericitems.append(genericitem_pod_group)
483+
451484
for role_idx, role in enumerate(app.roles):
452485
for replica_id in range(role.num_replicas):
453486
values = macros.Values(
@@ -473,8 +506,18 @@ def app_to_resource(
473506
replica_role,
474507
service_account,
475508
image_secret,
509+
coscheduler_name,
510+
)
511+
pod.metadata.labels.update(
512+
pod_labels(
513+
app=app,
514+
role_idx=role_idx,
515+
role=role,
516+
replica_id=replica_id,
517+
coscheduler_name=coscheduler_name,
518+
app_id=unique_app_id,
519+
)
476520
)
477-
pod.metadata.labels.update(pod_labels(app, role_idx, role, replica_id))
478521

479522
genericitem: Dict[str, Any] = {
480523
"replicas": 1,
@@ -676,6 +719,7 @@ class KubernetesMCADOpts(TypedDict, total=False):
676719
service_account: Optional[str]
677720
priority: Optional[int]
678721
image_secret: Optional[str]
722+
coscheduler_name: Optional[str]
679723

680724

681725
class KubernetesMCADScheduler(DockerWorkspaceMixin, Scheduler[KubernetesMCADOpts]):
@@ -699,6 +743,14 @@ class KubernetesMCADScheduler(DockerWorkspaceMixin, Scheduler[KubernetesMCADOpts
699743
$ torchx run --scheduler kubernetes_mcad --scheduler_args namespace=default,image_repo=<your_image_repo> utils.echo --image alpine:latest --msg hello
700744
...
701745
746+
The TorchX-MCAD scheduler can be used with a secondary scheduler on Kubernetes.
747+
To enable this, the user must provide the name of the coscheduler.
748+
With this feature, a PodGroup is defined for each TorchX role and the coscheduler
749+
handles secondary scheduling on the Kubernetes cluster. For additional resources, see:
750+
1. PodGroups and Coscheduling: https://github.com/kubernetes-sigs/scheduler-plugins/tree/release-1.24/pkg/coscheduling
751+
2. Installing Secondary schedulers: https://github.com/kubernetes-sigs/scheduler-plugins/blob/release-1.24/doc/install.md
752+
3. PodGroup CRD: https://github.com/kubernetes-sigs/scheduler-plugins/blob/release-1.24/config/crd/bases/scheduling.sigs.k8s.io_podgroups.yaml
753+
702754
**Config Options**
703755
704756
.. runopts::
@@ -861,9 +913,20 @@ def _submit_dryrun(
861913
namespace = cfg.get("namespace")
862914
assert isinstance(namespace, str), "namespace must be a str"
863915

916+
coscheduler_name = cfg.get("coscheduler_name")
917+
assert coscheduler_name is None or isinstance(
918+
coscheduler_name, str
919+
), "coscheduler_name must be a string"
920+
864921
resource = app_to_resource(
865-
app, namespace, service_account, image_secret, priority
922+
app=app,
923+
namespace=namespace,
924+
service_account=service_account,
925+
image_secret=image_secret,
926+
coscheduler_name=coscheduler_name,
927+
priority=priority,
866928
)
929+
867930
req = KubernetesMCADJob(
868931
resource=resource,
869932
images_to_push=images_to_push,
@@ -917,6 +980,11 @@ def run_opts(self) -> runopts:
917980
type_=str,
918981
help="The name of the Kubernetes/OpenShift secret set up for private images",
919982
)
983+
opts.add(
984+
"coscheduler_name",
985+
type_=str,
986+
help="Option to run TorchX-MCAD with a co-scheduler. User must provide the co-scheduler name.",
987+
)
920988
return opts
921989

922990
def describe(self, app_id: str) -> Optional[DescribeAppResponse]:
@@ -1070,12 +1138,21 @@ def create_scheduler(session_name: str, **kwargs: Any) -> KubernetesMCADSchedule
10701138

10711139
# TODO update to Kubernetes standard labels (https://kubernetes.io/docs/concepts/overview/working-with-objects/common-labels/)
10721140
def pod_labels(
1073-
app: AppDef, role_idx: int, role: Role, replica_id: int
1141+
app: AppDef,
1142+
role_idx: int,
1143+
role: Role,
1144+
replica_id: int,
1145+
coscheduler_name: Optional[str],
1146+
app_id: str,
10741147
) -> Dict[str, str]:
1075-
return {
1148+
labels = {
10761149
LABEL_VERSION: torchx.__version__,
10771150
LABEL_APP_NAME: app.name,
10781151
LABEL_ROLE_INDEX: str(role_idx),
10791152
LABEL_ROLE_NAME: role.name,
10801153
LABEL_REPLICA_ID: str(replica_id),
10811154
}
1155+
if coscheduler_name is not None:
1156+
pod_group = app_id + "-" + cleanup_str(role.name) + "-pg"
1157+
labels.update({"pod-group.scheduling.sigs.k8s.io": pod_group})
1158+
return labels

0 commit comments

Comments
 (0)