Skip to content

Commit 0615507

Browse files
authored
Sakkara take 2 (#133)
* Revert "fix path to sakkara chart (#132)" This reverts commit eb2f43e. * Revert "Add sakkara-deploy as a submodule and configure chart publishing (#131)" This reverts commit 723b80f. * import of helm chart from sakkara-deploy.
1 parent eb2f43e commit 0615507

15 files changed

+561
-15
lines changed

.github/workflows/release-chart.yaml

+1-11
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ jobs:
1414
uses: actions/checkout@v4
1515
with:
1616
fetch-depth: 0
17-
submodules: true
1817

1918
- name: Configure Git
2019
run: |
@@ -24,7 +23,7 @@ jobs:
2423
- name: Install Helm
2524
uses: azure/setup-helm@v4
2625

27-
- name: Publish PyTorchJob Generator Helm Chart
26+
- name: Run chart-releaser
2827
uses: helm/[email protected]
2928
with:
3029
charts_dir: tools/pytorchjob-generator
@@ -33,15 +32,6 @@ jobs:
3332
env:
3433
CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
3534

36-
- name: Publish Sakkara Scheduler Helm Chart
37-
uses: helm/[email protected]
38-
with:
39-
charts_dir: sakkara-deploy/install
40-
packages_with_index: true
41-
skip_existing: true
42-
env:
43-
CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
44-
4535
publish:
4636
needs: release
4737
uses: project-codeflare/mlbatch/.github/workflows/gh-pages-static.yml@main

.gitmodules

-3
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,3 @@
22
path = scheduler-plugins
33
url = https://github.com/kubernetes-sigs/scheduler-plugins.git
44
branch = release-1.28
5-
[submodule "sakkara-deploy"]
6-
path = sakkara-deploy
7-
url = [email protected]:atantawi/sakkara-deploy.git

sakkara-deploy

-1
This file was deleted.

tools/sakkara-deploy/README.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
The helm/chart-installer-action does not understand git submodules.
2+
3+
Therfore we maintain a copy of https://github.com/atantawi/sakkara-deploy/tree/main/install/ here.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
## Release Instructions
2+
3+
1. Create a release prep branch
4+
5+
2. Update the version number in chart/Chart.yaml
6+
7+
3. Do a `helm unittest -u chart` and then run precommit to
8+
regenerate the helmdocs. Inspect the diff and make sure
9+
the only changes are the Chart version
10+
11+
4. Update the chart version number in the example
12+
of `helm repo search` in the main README.md
13+
14+
5. Submit & merge a PR with these changes
15+
16+
6. Manually trigger the `Release Charts` workflow in the Actions
17+
tab of the MLBatch GitHub project. This action will automatically
18+
generate and push tags for the newly released chart and trigger an
19+
update of the GH Pages (which contains the helm repo).
20+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Patterns to ignore when building packages.
2+
# This supports shell glob matching, relative path matching, and
3+
# negation (prefixed with !). Only one pattern per line.
4+
.DS_Store
5+
# Common VCS dirs
6+
.git/
7+
.gitignore
8+
.bzr/
9+
.bzrignore
10+
.hg/
11+
.hgignore
12+
.svn/
13+
# Common backup files
14+
*.swp
15+
*.bak
16+
*.tmp
17+
*.orig
18+
*~
19+
# Various IDEs
20+
.project
21+
.idea/
22+
*.tmproj
23+
.vscode/
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
apiVersion: v2
2+
appVersion: v0.29.7
3+
description: Deploy sakkara group and topology aware scheduler plugin in a cluster
4+
name: sakkara-scheduler
5+
type: application
6+
version: 0.0.1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# sakkara-scheduler
2+
3+
![Version: 0.0.1](https://img.shields.io/badge/Version-0.0.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v0.29.7](https://img.shields.io/badge/AppVersion-v0.29.7-informational?style=flat-square)
4+
5+
Deploy sakkara group and topology aware scheduler plugin in a cluster
6+
7+
## Values
8+
9+
| Key | Type | Default | Description |
10+
|-----|------|---------|-------------|
11+
| fullnameOverride | string | `""` | |
12+
| image.repository | string | `"quay.io"` | repository to fetch images from |
13+
| image.tag | string | `"v0.0.1"` | default is the chart appVersion |
14+
| nameOverride | string | `"sakkara"` | |
15+
| nodeSelector | object | `{}` | |
16+
| pluginConfig[0].args.topologyConfigMapNameSpace | string | `"sakkara-scheduler"` | |
17+
| pluginConfig[0].name | string | `"ClusterTopologyPlacementGroup"` | |
18+
| plugins.permit.enabled[0].name | string | `"ClusterTopologyPlacementGroup"` | |
19+
| plugins.postBind.enabled[0].name | string | `"ClusterTopologyPlacementGroup"` | |
20+
| plugins.postFilter.enabled[0].name | string | `"ClusterTopologyPlacementGroup"` | |
21+
| plugins.preEnqueue.enabled[0].name | string | `"ClusterTopologyPlacementGroup"` | |
22+
| plugins.preScore.enabled[0].name | string | `"ClusterTopologyPlacementGroup"` | |
23+
| plugins.queueSort.disabled[0].name | string | `"*"` | |
24+
| plugins.queueSort.enabled[0].name | string | `"ClusterTopologyPlacementGroup"` | |
25+
| plugins.reserve.enabled[0].name | string | `"ClusterTopologyPlacementGroup"` | |
26+
| plugins.score.disabled[0].name | string | `"*"` | |
27+
| plugins.score.enabled[0].name | string | `"ClusterTopologyPlacementGroup"` | |
28+
| plugins.score.enabled[0].weight | int | `10` | |
29+
| podAnnotations | object | `{}` | |
30+
| priorityClassName | string | `"system-node-critical"` | |
31+
| scheduler.affinity | object | `{}` | affinity for deployment's pods |
32+
| scheduler.enabled | bool | `true` | deploy second scheduler as deployment |
33+
| scheduler.image | string | `"ibm/sakkara-scheduler"` | path to scheduler image from repository |
34+
| scheduler.imagePullPolicy | string | `"IfNotPresent"` | |
35+
| scheduler.leaderElect | bool | `false` | enable for HA mode |
36+
| scheduler.replicaCount | int | `1` | increase for HA mode |
37+
| scheduler.resources | object | `{"limits":{"cpu":"500m","memory":"512Mi"},"requests":{"cpu":"200m","memory":"512Mi"}}` | requests/limits for scheduler deployment resources: {} |
38+
| scheduler.strategy.type | string | `"RollingUpdate"` | Deployment update strategy type |
39+
| scheduler.verbosity | int | `6` | Log level from 1 to 9 |
40+
| schedulerConfig.apiVersion | string | `"kubescheduler.config.k8s.io/v1"` | scheduler config apiversion (ref: https://kubernetes.io/docs/reference/scheduling/config/) |
41+
| securityContext.privileged | bool | `false` | |
42+
| tolerations | list | `[]` | |
43+
| useForKubeSchedulerUser | bool | `false` | allow User system:kube-scheduler to work with metrics and CRDs. primary usage is to replace default-scheduler with custom one |
44+
45+
----------------------------------------------
46+
Autogenerated from chart metadata using [helm-docs v1.14.2](https://github.com/norwoodj/helm-docs/releases/v1.14.2)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
---
2+
apiVersion: apiextensions.k8s.io/v1
3+
kind: CustomResourceDefinition
4+
metadata:
5+
annotations:
6+
api-approved.kubernetes.io: https://github.com/kubernetes-sigs/scheduler-plugins/pull/50
7+
controller-gen.kubebuilder.io/version: v0.11.1
8+
creationTimestamp: null
9+
name: podgroups.scheduling.x-k8s.io
10+
spec:
11+
group: scheduling.x-k8s.io
12+
names:
13+
kind: PodGroup
14+
listKind: PodGroupList
15+
plural: podgroups
16+
shortNames:
17+
- pg
18+
- pgs
19+
singular: podgroup
20+
scope: Namespaced
21+
versions:
22+
- name: v1alpha1
23+
schema:
24+
openAPIV3Schema:
25+
description: PodGroup is a collection of Pod; used for batch workload.
26+
properties:
27+
apiVersion:
28+
description: 'APIVersion defines the versioned schema of this representation
29+
of an object. Servers should convert recognized schemas to the latest
30+
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
31+
type: string
32+
kind:
33+
description: 'Kind is a string value representing the REST resource this
34+
object represents. Servers may infer this from the endpoint the client
35+
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
36+
type: string
37+
metadata:
38+
type: object
39+
spec:
40+
description: Specification of the desired behavior of the pod group.
41+
properties:
42+
minMember:
43+
description: MinMember defines the minimal number of members/tasks
44+
to run the pod group; if there's not enough resources to start all
45+
tasks, the scheduler will not start anyone.
46+
format: int32
47+
type: integer
48+
minResources:
49+
additionalProperties:
50+
anyOf:
51+
- type: integer
52+
- type: string
53+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
54+
x-kubernetes-int-or-string: true
55+
description: MinResources defines the minimal resource of members/tasks
56+
to run the pod group; if there's not enough resources to start all
57+
tasks, the scheduler will not start anyone.
58+
type: object
59+
scheduleTimeoutSeconds:
60+
description: ScheduleTimeoutSeconds defines the maximal time of members/tasks
61+
to wait before run the pod group;
62+
format: int32
63+
type: integer
64+
type: object
65+
status:
66+
description: Status represents the current information about a pod group.
67+
This data may not be up to date.
68+
properties:
69+
failed:
70+
description: The number of pods which reached phase Failed.
71+
format: int32
72+
type: integer
73+
occupiedBy:
74+
description: OccupiedBy marks the workload (e.g., deployment, statefulset)
75+
UID that occupy the podgroup. It is empty if not initialized.
76+
type: string
77+
phase:
78+
description: Current phase of PodGroup.
79+
type: string
80+
running:
81+
description: The number of actively running pods.
82+
format: int32
83+
type: integer
84+
scheduleStartTime:
85+
description: ScheduleStartTime of the group
86+
format: date-time
87+
type: string
88+
succeeded:
89+
description: The number of pods which reached phase Succeeded.
90+
format: int32
91+
type: integer
92+
type: object
93+
type: object
94+
served: true
95+
storage: true
96+
subresources:
97+
status: {}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
{{/*
2+
Expand the name of the chart.
3+
*/}}
4+
{{- define "scheduler-plugins.name" -}}
5+
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6+
{{- end }}
7+
8+
{{/*
9+
Create a default fully qualified app name.
10+
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11+
If release name contains chart name it will be used as a full name.
12+
*/}}
13+
{{- define "scheduler-plugins.fullname" -}}
14+
{{- if .Values.fullnameOverride }}
15+
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16+
{{- else }}
17+
{{- $name := default .Chart.Name .Values.nameOverride }}
18+
{{- if contains $name .Release.Name }}
19+
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
20+
{{- else }}
21+
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22+
{{- end }}
23+
{{- end }}
24+
{{- end }}
25+
26+
{{/*
27+
Create chart name and version as used by the chart label.
28+
*/}}
29+
{{- define "scheduler-plugins.chart" -}}
30+
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31+
{{- end }}
32+
33+
{{/*
34+
Common labels
35+
*/}}
36+
{{- define "scheduler-plugins.labels" -}}
37+
helm.sh/chart: {{ include "scheduler-plugins.chart" . }}
38+
{{ include "scheduler-plugins.selectorLabels" . }}
39+
{{- if .Chart.AppVersion }}
40+
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41+
{{- end }}
42+
app.kubernetes.io/managed-by: {{ .Release.Service }}
43+
{{- end }}
44+
45+
{{/*
46+
Selector labels
47+
*/}}
48+
{{- define "scheduler-plugins.selectorLabels" -}}
49+
app.kubernetes.io/name: {{ include "scheduler-plugins.name" . }}
50+
app.kubernetes.io/instance: {{ .Release.Name }}
51+
{{- end }}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
apiVersion: v1
2+
kind: ConfigMap
3+
metadata:
4+
name: {{ include "scheduler-plugins.fullname" . }}
5+
namespace: {{ .Release.Namespace }}
6+
labels:
7+
{{- include "scheduler-plugins.labels" . | nindent 4 }}
8+
data:
9+
scheduler-config.yaml: |
10+
apiVersion: {{ .Values.schedulerConfig.apiVersion }}
11+
kind: KubeSchedulerConfiguration
12+
leaderElection:
13+
leaderElect: {{ .Values.scheduler.leaderElect }}
14+
resourceName: {{ include "scheduler-plugins.fullname" . }}
15+
profiles:
16+
# Compose all plugins in one profile
17+
- schedulerName: {{ include "scheduler-plugins.fullname" . }}
18+
plugins:
19+
{{- toYaml $.Values.plugins | nindent 8 }}
20+
{{- if $.Values.pluginConfig }}
21+
pluginConfig: {{ toYaml $.Values.pluginConfig | nindent 6 }}
22+
{{- end }}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
{{- if .Values.scheduler.enabled }}
2+
---
3+
apiVersion: apps/v1
4+
kind: Deployment
5+
metadata:
6+
name: {{ include "scheduler-plugins.fullname" . }}
7+
namespace: {{ .Release.Namespace }}
8+
labels:
9+
{{- include "scheduler-plugins.labels" . | nindent 4 }}
10+
component: scheduler
11+
spec:
12+
replicas: {{ .Values.scheduler.replicaCount }}
13+
{{- with .Values.scheduler.strategy }}
14+
strategy:
15+
{{- toYaml . | nindent 4 }}
16+
{{- end }}
17+
selector:
18+
matchLabels:
19+
{{- include "scheduler-plugins.selectorLabels" . | nindent 6 }}
20+
component: scheduler
21+
template:
22+
metadata:
23+
annotations:
24+
checksum/configmap: '{{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}'
25+
{{- with .Values.podAnnotations }}
26+
{{- toYaml . | nindent 8 }}
27+
{{- end }}
28+
labels:
29+
{{- include "scheduler-plugins.selectorLabels" . | nindent 8 }}
30+
component: scheduler
31+
spec:
32+
priorityClassName: {{ .Values.priorityClassName }}
33+
serviceAccountName: {{ include "scheduler-plugins.fullname" . }}
34+
containers:
35+
- command:
36+
- /bin/kube-scheduler
37+
- --config=/etc/kubernetes/scheduler-config.yaml
38+
- --v={{ .Values.scheduler.verbosity }}
39+
name: scheduler
40+
image: "{{ .Values.image.repository }}/{{ .Values.scheduler.image }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
41+
imagePullPolicy: {{ .Values.scheduler.imagePullPolicy }}
42+
resources:
43+
{{- toYaml .Values.scheduler.resources | nindent 12 }}
44+
securityContext:
45+
{{- toYaml .Values.securityContext | nindent 12 }}
46+
volumeMounts:
47+
- name: scheduler-config
48+
mountPath: /etc/kubernetes
49+
readOnly: true
50+
{{- with .Values.nodeSelector }}
51+
nodeSelector:
52+
{{- toYaml . | nindent 8 }}
53+
{{- end }}
54+
{{- with .Values.scheduler.affinity }}
55+
affinity:
56+
{{- toYaml . | nindent 8 }}
57+
{{- end }}
58+
{{- with .Values.tolerations }}
59+
tolerations:
60+
{{- toYaml . | nindent 8 }}
61+
{{- end }}
62+
volumes:
63+
- name: scheduler-config
64+
configMap:
65+
name: {{ include "scheduler-plugins.fullname" . }}
66+
{{- end }}

0 commit comments

Comments
 (0)