Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions deployments/kai-scheduler/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright 2025 NVIDIA CORPORATION
# SPDX-License-Identifier: Apache-2.0

{{/*
Operator PodDisruptionBudget: merge values.operator.podDisruptionBudget with safe defaults.
Uses hasKey (not default) so enabled: false and maxUnavailable: 0 are respected (Sprig default() treats them as empty).

Returns a small YAML object with keys: enabled, maxUnavailable
*/}}
{{- define "kai-scheduler.operator.podDisruptionBudgetConfig" -}}
{{- $pdb := .Values.operator.podDisruptionBudget | default dict }}
{{- $pdbEnabled := true }}
{{- if hasKey $pdb "enabled" }}
{{- $pdbEnabled = $pdb.enabled }}
{{- end }}
{{- $maxUnavailable := 1 }}
{{- if hasKey $pdb "maxUnavailable" }}
{{- $maxUnavailable = int $pdb.maxUnavailable }}
{{- end }}
{{- dict "enabled" $pdbEnabled "maxUnavailable" $maxUnavailable | toYaml }}
{{- end }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright 2025 NVIDIA CORPORATION
# SPDX-License-Identifier: Apache-2.0

{{- $cfg := fromYaml (include "kai-scheduler.operator.podDisruptionBudgetConfig" .) }}
{{- if and $cfg.enabled (gt (int .Values.operator.replicaCount) 1) }}
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: kai-operator-pdb
namespace: {{ .Release.Namespace }}
labels:
app: kai-operator
spec:
maxUnavailable: {{ $cfg.maxUnavailable }}
selector:
matchLabels:
app: kai-operator
{{- end }}
6 changes: 6 additions & 0 deletions deployments/kai-scheduler/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ operator:
probeBindAddress: ":8081"
qps: 50
burst: 300
# PodDisruptionBudget limits voluntary evictions (drain) so not all operator pods are removed at once.
# Renders only when operator.replicaCount > 1. With a single replica, a PDB (e.g. minAvailable: 1) can
# block drains; maxUnavailable is not used for replicaCount: 1 by default.
podDisruptionBudget:
enabled: true
maxUnavailable: 1

podgrouper:
enabled: true
Expand Down