Skip to content

Torchx mcad coscheduler #693

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 84 additions & 7 deletions torchx/schedulers/kubernetes_mcad_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,9 @@
Prerequisites
==============

TorchX kubernetes scheduler depends on AppWrapper + MCAD.

Install MCAD
TorchX Kubernetes_MCAD scheduler depends on AppWrapper + MCAD.

Install MCAD:
See deploying Multi-Cluster-Application-Dispatcher guide
https://github.com/project-codeflare/multi-cluster-app-dispatcher/blob/main/doc/deploy/deployment.md

Expand Down Expand Up @@ -168,6 +167,7 @@ def role_to_pod(
role: Role,
service_account: Optional[str],
image_secret: Optional[str],
coscheduler_name: Optional[str],
) -> "V1Pod":
from kubernetes.client.models import ( # noqa: F811 redefinition of unused
V1Container,
Expand Down Expand Up @@ -338,6 +338,7 @@ def role_to_pod(
service_account_name=service_account,
volumes=volumes,
node_selector=node_selector,
scheduler_name=coscheduler_name,
),
metadata=V1ObjectMeta(
name=name,
Expand All @@ -352,6 +353,31 @@ def role_to_pod(
)


def create_pod_group(role: Role, namespace: str, app_id: str) -> "Dict[str, Any]":
pod_group_name = app_id + "-" + cleanup_str(role.name) + "-pg"

pod_group: Dict[str, Any] = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Included the PodGroup CRD (release 1.24) in the updated documentation.

"apiVersion": "scheduling.sigs.k8s.io/v1alpha1",
"kind": "PodGroup",
"metadata": {
"name": pod_group_name,
"namespace": namespace,
"labels": {
"appwrapper.mcad.ibm.com": app_id,
},
},
"spec": {
"minMember": role.num_replicas,
},
}

genericitem_pod_group: Dict[str, Any] = {
"replicas": 1,
"generictemplate": pod_group,
}
return genericitem_pod_group


def mcad_svc(svc_name: str, namespace: str, service_port: str) -> "V1Service":
from kubernetes.client.models import ( # noqa: F401, F811
V1Container,
Expand Down Expand Up @@ -436,6 +462,7 @@ def app_to_resource(
namespace: str,
service_account: Optional[str],
image_secret: Optional[str],
coscheduler_name: Optional[str],
priority: Optional[int] = None,
) -> Dict[str, Any]:
"""
Expand All @@ -448,6 +475,12 @@ def app_to_resource(
genericitems = []

unique_app_id = cleanup_str(make_unique(app.name))

if coscheduler_name is not None:
for role_idx, role in enumerate(app.roles):
genericitem_pod_group = create_pod_group(role, namespace, unique_app_id)
genericitems.append(genericitem_pod_group)

for role_idx, role in enumerate(app.roles):
for replica_id in range(role.num_replicas):
values = macros.Values(
Expand All @@ -473,8 +506,18 @@ def app_to_resource(
replica_role,
service_account,
image_secret,
coscheduler_name,
)
pod.metadata.labels.update(
pod_labels(
app=app,
role_idx=role_idx,
role=role,
replica_id=replica_id,
coscheduler_name=coscheduler_name,
app_id=unique_app_id,
)
)
pod.metadata.labels.update(pod_labels(app, role_idx, role, replica_id))

genericitem: Dict[str, Any] = {
"replicas": 1,
Expand Down Expand Up @@ -676,6 +719,7 @@ class KubernetesMCADOpts(TypedDict, total=False):
service_account: Optional[str]
priority: Optional[int]
image_secret: Optional[str]
coscheduler_name: Optional[str]


class KubernetesMCADScheduler(DockerWorkspaceMixin, Scheduler[KubernetesMCADOpts]):
Expand All @@ -699,6 +743,14 @@ class KubernetesMCADScheduler(DockerWorkspaceMixin, Scheduler[KubernetesMCADOpts
$ torchx run --scheduler kubernetes_mcad --scheduler_args namespace=default,image_repo=<your_image_repo> utils.echo --image alpine:latest --msg hello
...

The TorchX-MCAD scheduler can be used with a secondary scheduler on Kubernetes.
To enable this, the user must provide the name of the coscheduler.
With this feature, a PodGroup is defined for each TorchX role and the coscheduler
handles secondary scheduling on the Kubernetes cluster. For additional resources, see:
1. PodGroups and Coscheduling: https://github.com/kubernetes-sigs/scheduler-plugins/tree/release-1.24/pkg/coscheduling
2. Installing Secondary schedulers: https://github.com/kubernetes-sigs/scheduler-plugins/blob/release-1.24/doc/install.md
3. PodGroup CRD: https://github.com/kubernetes-sigs/scheduler-plugins/blob/release-1.24/config/crd/bases/scheduling.sigs.k8s.io_podgroups.yaml

**Config Options**

.. runopts::
Expand Down Expand Up @@ -861,9 +913,20 @@ def _submit_dryrun(
namespace = cfg.get("namespace")
assert isinstance(namespace, str), "namespace must be a str"

coscheduler_name = cfg.get("coscheduler_name")
assert coscheduler_name is None or isinstance(
coscheduler_name, str
), "coscheduler_name must be a string"

resource = app_to_resource(
app, namespace, service_account, image_secret, priority
app=app,
namespace=namespace,
service_account=service_account,
image_secret=image_secret,
coscheduler_name=coscheduler_name,
priority=priority,
)

req = KubernetesMCADJob(
resource=resource,
images_to_push=images_to_push,
Expand Down Expand Up @@ -917,6 +980,11 @@ def run_opts(self) -> runopts:
type_=str,
help="The name of the Kubernetes/OpenShift secret set up for private images",
)
opts.add(
"coscheduler_name",
type_=str,
help="Option to run TorchX-MCAD with a co-scheduler. User must provide the co-scheduler name.",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add some links to any external documentation about coschedulers?

Like: https://github.com/kubernetes-sigs/scheduler-plugins/blob/master/pkg/coscheduling/README.md ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated the documentation with brief explanation and references to secondary scheduling, coscheduling and PodGroups, and the PodGroup CRD.

)
return opts

def describe(self, app_id: str) -> Optional[DescribeAppResponse]:
Expand Down Expand Up @@ -1070,12 +1138,21 @@ def create_scheduler(session_name: str, **kwargs: Any) -> KubernetesMCADSchedule

# TODO update to Kubernetes standard labels (https://kubernetes.io/docs/concepts/overview/working-with-objects/common-labels/)
def pod_labels(
app: AppDef, role_idx: int, role: Role, replica_id: int
app: AppDef,
role_idx: int,
role: Role,
replica_id: int,
coscheduler_name: Optional[str],
app_id: str,
) -> Dict[str, str]:
return {
labels = {
LABEL_VERSION: torchx.__version__,
LABEL_APP_NAME: app.name,
LABEL_ROLE_INDEX: str(role_idx),
LABEL_ROLE_NAME: role.name,
LABEL_REPLICA_ID: str(replica_id),
}
if coscheduler_name is not None:
pod_group = app_id + "-" + cleanup_str(role.name) + "-pg"
labels.update({"pod-group.scheduling.sigs.k8s.io": pod_group})
return labels
Loading