Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,8 @@ __pycache__/
*.tgz
.claude/

# Extracted subchart artifacts from helm dep update
/kube-prometheus-stack/
/kube-state-metrics/
/metrics-server/

195 changes: 195 additions & 0 deletions charts/dataplane/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -910,6 +910,201 @@ nodeName: {{- toYaml . }}
{{- end }}
{{- end -}}

{{/*
Prometheus scheduling helpers
*/}}
{{- define "prometheus.scheduling.topologySpreadConstraints" -}}
{{- with .Values.prometheus.topologySpreadConstraints }}
topologySpreadConstraints:
{{ toYaml . | nindent 2 }}
{{- end }}
{{- end }}

{{- define "prometheus.scheduling.affinity" -}}
{{- with .Values.prometheus.affinity }}
affinity:
{{ toYaml . | nindent 2 }}
{{- end }}
{{- end }}

{{- define "prometheus.scheduling.nodeSelector" -}}
{{- with .Values.prometheus.nodeSelector }}
nodeSelector:
{{ toYaml . | nindent 2 }}
{{- end }}
{{- end }}

{{- define "prometheus.scheduling.nodeName" -}}
{{- with .Values.prometheus.nodeName }}
nodeName: {{ toYaml . }}
{{- end }}
{{- end }}

{{- define "prometheus.scheduling.tolerations" -}}
{{- with .Values.prometheus.tolerations }}
tolerations:
{{ toYaml . | nindent 2 }}
{{- end }}
{{- end }}

{{- define "prometheus.scheduling" -}}
{{- if .Values.prometheus.topologySpreadConstraints }}
{{- include "prometheus.scheduling.topologySpreadConstraints" . }}
{{- else }}
{{- include "global.scheduling.topologySpreadConstraints" . }}
{{- end }}
{{- if .Values.prometheus.affinity }}
{{- include "prometheus.scheduling.affinity" . }}
{{- else }}
{{- include "global.scheduling.affinity" . }}
{{- end }}
{{- if .Values.prometheus.nodeSelector }}
{{- include "prometheus.scheduling.nodeSelector" . }}
{{- else }}
{{- include "global.scheduling.nodeSelector" . }}
{{- end }}
{{- if .Values.prometheus.nodeName }}
{{- include "prometheus.scheduling.nodeName" . }}
{{- else }}
{{- include "global.scheduling.nodeName" . }}
{{- end }}
{{- if .Values.prometheus.tolerations }}
{{- include "prometheus.scheduling.tolerations" . }}
{{- else }}
{{- include "global.scheduling.tolerations" . }}
{{- end }}
{{- end -}}

{{/*
Flyteconnector scheduling helpers
*/}}
{{- define "flyteconnector.scheduling.topologySpreadConstraints" -}}
{{- with .Values.flyteconnector.topologySpreadConstraints }}
topologySpreadConstraints:
{{ toYaml . | nindent 2 }}
{{- end }}
{{- end }}

{{- define "flyteconnector.scheduling.affinity" -}}
{{- with .Values.flyteconnector.affinity }}
affinity:
{{ toYaml . | nindent 2 }}
{{- end }}
{{- end }}

{{- define "flyteconnector.scheduling.nodeSelector" -}}
{{- with .Values.flyteconnector.nodeSelector }}
nodeSelector:
{{ toYaml . | nindent 2 }}
{{- end }}
{{- end }}

{{- define "flyteconnector.scheduling.nodeName" -}}
{{- with .Values.flyteconnector.nodeName }}
nodeName: {{ toYaml . }}
{{- end }}
{{- end }}

{{- define "flyteconnector.scheduling.tolerations" -}}
{{- with .Values.flyteconnector.tolerations }}
tolerations:
{{ toYaml . | nindent 2 }}
{{- end }}
{{- end }}

{{- define "flyteconnector.scheduling" -}}
{{- if .Values.flyteconnector.topologySpreadConstraints }}
{{- include "flyteconnector.scheduling.topologySpreadConstraints" . }}
{{- else }}
{{- include "global.scheduling.topologySpreadConstraints" . }}
{{- end }}
{{- if .Values.flyteconnector.affinity }}
{{- include "flyteconnector.scheduling.affinity" . }}
{{- else }}
{{- include "global.scheduling.affinity" . }}
{{- end }}
{{- if .Values.flyteconnector.nodeSelector }}
{{- include "flyteconnector.scheduling.nodeSelector" . }}
{{- else }}
{{- include "global.scheduling.nodeSelector" . }}
{{- end }}
{{- if .Values.flyteconnector.nodeName }}
{{- include "flyteconnector.scheduling.nodeName" . }}
{{- else }}
{{- include "global.scheduling.nodeName" . }}
{{- end }}
{{- if .Values.flyteconnector.tolerations }}
{{- include "flyteconnector.scheduling.tolerations" . }}
{{- else }}
{{- include "global.scheduling.tolerations" . }}
{{- end }}
{{- end -}}

{{/*
Imagebuilder buildkit scheduling helpers
*/}}
{{- define "imagebuilder.buildkit.scheduling.topologySpreadConstraints" -}}
{{- with .Values.imageBuilder.buildkit.topologySpreadConstraints }}
topologySpreadConstraints:
{{ toYaml . | nindent 2 }}
{{- end }}
{{- end }}

{{- define "imagebuilder.buildkit.scheduling.affinity" -}}
{{- with .Values.imageBuilder.buildkit.affinity }}
affinity:
{{ toYaml . | nindent 2 }}
{{- end }}
{{- end }}

{{- define "imagebuilder.buildkit.scheduling.nodeSelector" -}}
{{- with .Values.imageBuilder.buildkit.nodeSelector }}
nodeSelector:
{{ toYaml . | nindent 2 }}
{{- end }}
{{- end }}

{{- define "imagebuilder.buildkit.scheduling.nodeName" -}}
{{- with .Values.imageBuilder.buildkit.nodeName }}
nodeName: {{ toYaml . }}
{{- end }}
{{- end }}

{{- define "imagebuilder.buildkit.scheduling.tolerations" -}}
{{- with .Values.imageBuilder.buildkit.tolerations }}
tolerations:
{{ toYaml . | nindent 2 }}
{{- end }}
{{- end }}

{{- define "imagebuilder.buildkit.scheduling" -}}
{{- if .Values.imageBuilder.buildkit.topologySpreadConstraints }}
{{- include "imagebuilder.buildkit.scheduling.topologySpreadConstraints" . }}
{{- else }}
{{- include "global.scheduling.topologySpreadConstraints" . }}
{{- end }}
{{- if .Values.imageBuilder.buildkit.affinity }}
{{- include "imagebuilder.buildkit.scheduling.affinity" . }}
{{- else }}
{{- include "global.scheduling.affinity" . }}
{{- end }}
{{- if .Values.imageBuilder.buildkit.nodeSelector }}
{{- include "imagebuilder.buildkit.scheduling.nodeSelector" . }}
{{- else }}
{{- include "global.scheduling.nodeSelector" . }}
{{- end }}
{{- if .Values.imageBuilder.buildkit.nodeName }}
{{- include "imagebuilder.buildkit.scheduling.nodeName" . }}
{{- else }}
{{- include "global.scheduling.nodeName" . }}
{{- end }}
{{- if .Values.imageBuilder.buildkit.tolerations }}
{{- include "imagebuilder.buildkit.scheduling.tolerations" . }}
{{- else }}
{{- include "global.scheduling.tolerations" . }}
{{- end }}
{{- end -}}

{{/*
Global service account annotations
*/}}
Expand Down
10 changes: 1 addition & 9 deletions charts/dataplane/templates/flyteconnector/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,5 @@ spec:
{{- with .Values.flyteconnector.additionalVolumes -}}
{{ tpl (toYaml .) $ | nindent 6 }}
{{- end }}
{{- with .Values.flyteconnector.nodeSelector }}
nodeSelector: {{ tpl (toYaml .) $ | nindent 8 }}
{{- end }}
{{- with .Values.flyteconnector.affinity }}
affinity: {{ tpl (toYaml .) $ | nindent 8 }}
{{- end }}
{{- with .Values.flyteconnector.tolerations }}
tolerations: {{ tpl (toYaml .) $ | nindent 8 }}
{{- end }}
{{- include "flyteconnector.scheduling" . | nindent 6 }}
{{- end }}
14 changes: 8 additions & 6 deletions charts/dataplane/templates/imagebuilder/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,10 @@ spec:
{{- with .Values.imageBuilder.buildkit.additionalVolumes -}}
{{ tpl (toYaml .) $ | nindent 6 }}
{{- end }}
{{- with .Values.imageBuilder.buildkit.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- if .Values.imageBuilder.buildkit.nodeSelector }}
{{- include "imagebuilder.buildkit.scheduling.nodeSelector" . | nindent 6 }}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm using the customer-facing resources to find the gaps.
I set this in values

scheduling:
  affinity:
    nodeAffinity:
      requiredDuringSchedulingIgnoredDuringExecution:
        nodeSelectorTerms:
        - matchExpressions:
          - key: union.ai/node-role
            operator: In
            values:
            - services

But I see this is not picked up by this helper and confirms what I found by testing

{{- else if .Values.scheduling.nodeSelector }}
{{- include "global.scheduling.nodeSelector" . | nindent 6 }}
{{- end }}
affinity:
podAntiAffinity:
Expand All @@ -111,8 +112,9 @@ spec:
matchLabels:
{{- include "imagebuilder.buildkit.selectorLabels" . | nindent 16 }}
topologyKey: "kubernetes.io/hostname"
{{- with .Values.imageBuilder.buildkit.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- if .Values.imageBuilder.buildkit.tolerations }}
{{- include "imagebuilder.buildkit.scheduling.tolerations" . | nindent 6 }}
{{- else if .Values.scheduling.tolerations }}
{{- include "global.scheduling.tolerations" . | nindent 6 }}
{{- end }}
{{- end }}
13 changes: 1 addition & 12 deletions charts/dataplane/templates/prometheus/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,4 @@ spec:
- name: prometheus-config
configMap:
name: {{ include "union-operator.fullname" . }}-prometheus
{{- with .Values.prometheus.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.prometheus.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.prometheus.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- include "prometheus.scheduling" . | nindent 6 }}
34 changes: 33 additions & 1 deletion charts/dataplane/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,12 @@ opencost:
limits:
cpu: 1000m
memory: 4Gi
# -- Tolerations for opencost pods. Set to match scheduling.tolerations when using dedicated node pools.
tolerations: []
# -- Node selector for opencost pods. Set to match scheduling.nodeSelector when using dedicated node pools.
nodeSelector: {}
# -- Affinity rules for opencost pods.
affinity: {}

# -- Configuration for fluentbit used for the persistent logging feature.
# FluentBit runs as a DaemonSet and ships container logs to the persisted-logs/
Expand Down Expand Up @@ -1164,6 +1170,10 @@ image:

metrics-server:
enabled: false
# -- Tolerations for metrics-server pods. Set to match scheduling.tolerations when using dedicated node pools.
tolerations: []
# -- Node selector for metrics-server pods. Set to match scheduling.nodeSelector when using dedicated node pools.
nodeSelector: {}

# -- nodeobserver contains the configuration information for the node observer service.
nodeobserver:
Expand Down Expand Up @@ -1334,7 +1344,11 @@ prometheus:

# -- Standalone kube-state-metrics for Union features (cost tracking, pod resource metrics).
# Metric filtering is handled in the Prometheus static scrape config.
kube-state-metrics: {}
kube-state-metrics:
# -- Tolerations for kube-state-metrics pods. Set to match scheduling.tolerations when using dedicated node pools.
tolerations: []
# -- Node selector for kube-state-metrics pods. Set to match scheduling.nodeSelector when using dedicated node pools.
nodeSelector: {}

# -- Scopes the deployment, permissions and actions created into a single namespace
low_privilege: false
Expand Down Expand Up @@ -1704,6 +1718,10 @@ monitoring:

prometheusOperator:
enabled: true
# -- Tolerations for prometheus-operator pods. Set to match scheduling.tolerations when using dedicated node pools.
tolerations: []
# -- Node selector for prometheus-operator pods. Set to match scheduling.nodeSelector when using dedicated node pools.
nodeSelector: {}

# CRDs should be installed separately via the dataplane-crds chart
# (set crds.prometheusOperator: true) before enabling the monitoring stack.
Expand All @@ -1727,6 +1745,11 @@ monitoring:
# Should override for production deployments
adminPassword: admin

# -- Tolerations for grafana pods. Set to match scheduling.tolerations when using dedicated node pools.
tolerations: []
# -- Node selector for grafana pods. Set to match scheduling.nodeSelector when using dedicated node pools.
nodeSelector: {}

# Default monitoring stack for all relevant K8s components that impact
# Union performance and reliability.
coreDns:
Expand All @@ -1753,6 +1776,10 @@ monitoring:
kube-state-metrics:
nameOverride: "monitoring-kube-state-metrics"
fullnameOverride: "monitoring-kube-state-metrics"
# -- Tolerations for monitoring kube-state-metrics pods. Set to match scheduling.tolerations when using dedicated node pools.
tolerations: []
# -- Node selector for monitoring kube-state-metrics pods. Set to match scheduling.nodeSelector when using dedicated node pools.
nodeSelector: {}

# By default, install a separate Prometheus instance for monitoring.
# This is the simplest, out of the box model, it is highly recommended that users look
Expand Down Expand Up @@ -1782,3 +1809,8 @@ monitoring:
requests:
cpu: "500m"
memory: "1Gi"

# -- Tolerations for monitoring prometheus pods. Set to match scheduling.tolerations when using dedicated node pools.
tolerations: []
# -- Node selector for monitoring prometheus pods. Set to match scheduling.nodeSelector when using dedicated node pools.
nodeSelector: {}
Loading
Loading