diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index 0b075640b22f..f9c91aa94592 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -520,13 +520,15 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr return false, nil } + forceScaleUp := a.processors.ScaleUpEnforcer.ShouldForceScaleUp(unschedulablePodsToHelp) + if len(unschedulablePodsToHelp) == 0 { scaleUpStatus.Result = status.ScaleUpNotNeeded klog.V(1).Info("No unschedulable pods") - } else if a.MaxNodesTotal > 0 && len(readyNodes) >= a.MaxNodesTotal { + } else if a.MaxNodesTotal > 0 && len(readyNodes) >= a.MaxNodesTotal && !forceScaleUp { scaleUpStatus.Result = status.ScaleUpNoOptionsAvailable klog.V(1).Infof("Max total nodes in cluster reached: %v. Current number of ready nodes: %v", a.MaxNodesTotal, len(readyNodes)) - } else if len(a.BypassedSchedulers) == 0 && allPodsAreNew(unschedulablePodsToHelp, currentTime) { + } else if len(a.BypassedSchedulers) == 0 && !forceScaleUp && allPodsAreNew(unschedulablePodsToHelp, currentTime) { // The assumption here is that these pods have been created very recently and probably there // is more pods to come. In theory we could check the newest pod time but then if pod were created // slowly but at the pace of 1 every 2 seconds then no scale up would be triggered for long time. diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index 3395b26ac6e7..addd89abef7f 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -563,6 +563,8 @@ func buildAutoscaler(context ctx.Context, debuggingSnapshotter debuggingsnapshot opts.LoopStartNotifier = loopstart.NewObserversList([]loopstart.Observer{provreqProcesor}) podListProcessor.AddProcessor(provreqProcesor) + + opts.Processors.ScaleUpEnforcer = provreq.NewProvisioningRequestScaleUpEnforcer() } if *proactiveScaleupEnabled { diff --git a/cluster-autoscaler/processors/pods/scaleup_enforcer.go b/cluster-autoscaler/processors/pods/scaleup_enforcer.go new file mode 100644 index 000000000000..08ff78c3456a --- /dev/null +++ b/cluster-autoscaler/processors/pods/scaleup_enforcer.go @@ -0,0 +1,38 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package pods + +import apiv1 "k8s.io/api/core/v1" + +// ScaleUpEnforcer can force scale up even if all pods are new or MaxNodesTotal was achieved. +type ScaleUpEnforcer interface { + ShouldForceScaleUp(unschedulablePods []*apiv1.Pod) bool +} + +// NoOpScaleUpEnforcer returns false by default in case of ProvisioningRequests disabled. +type NoOpScaleUpEnforcer struct { +} + +// NewDefaultScaleUpEnforcer creates an instance of ScaleUpEnforcer. +func NewDefaultScaleUpEnforcer() ScaleUpEnforcer { + return &NoOpScaleUpEnforcer{} +} + +// ShouldForceScaleUp returns false by default. +func (p *NoOpScaleUpEnforcer) ShouldForceScaleUp(unschedulablePods []*apiv1.Pod) bool { + return false +} diff --git a/cluster-autoscaler/processors/pods/scaleup_enforcer_test.go b/cluster-autoscaler/processors/pods/scaleup_enforcer_test.go new file mode 100644 index 000000000000..b33d21ab9c95 --- /dev/null +++ b/cluster-autoscaler/processors/pods/scaleup_enforcer_test.go @@ -0,0 +1,34 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package pods + +import ( + "testing" + + apiv1 "k8s.io/api/core/v1" + testutils "k8s.io/autoscaler/cluster-autoscaler/utils/test" +) + +func TestDefaultScaleUpEnforcer(t *testing.T) { + p1 := testutils.BuildTestPod("p1", 40, 0) + unschedulablePods := []*apiv1.Pod{p1} + scaleUpEnforcer := NewDefaultScaleUpEnforcer() + forceScaleUp := scaleUpEnforcer.ShouldForceScaleUp(unschedulablePods) + if forceScaleUp { + t.Errorf("Error: scaleUpEnforcer should not force scale up by default") + } +} diff --git a/cluster-autoscaler/processors/processors.go b/cluster-autoscaler/processors/processors.go index cefdc36dcb22..b391fed789b2 100644 --- a/cluster-autoscaler/processors/processors.go +++ b/cluster-autoscaler/processors/processors.go @@ -74,6 +74,8 @@ type AutoscalingProcessors struct { ScaleStateNotifier *nodegroupchange.NodeGroupChangeObserversList // AsyncNodeGroupChecker checks if node group is upcoming or not AsyncNodeGroupStateChecker asyncnodegroups.AsyncNodeGroupStateChecker + // ScaleUpEnforcer can force scale up even if all pods are new or MaxNodesTotal was achieved. + ScaleUpEnforcer pods.ScaleUpEnforcer } // DefaultProcessors returns default set of processors. @@ -100,6 +102,7 @@ func DefaultProcessors(options config.AutoscalingOptions) *AutoscalingProcessors TemplateNodeInfoProvider: nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false), ScaleDownCandidatesNotifier: scaledowncandidates.NewObserversList(), ScaleStateNotifier: nodegroupchange.NewNodeGroupChangeObserversList(), + ScaleUpEnforcer: pods.NewDefaultScaleUpEnforcer(), } } diff --git a/cluster-autoscaler/processors/provreq/pods_filter.go b/cluster-autoscaler/processors/provreq/pods_filter.go index a2d0d4a3d664..f2592ba0a6d9 100644 --- a/cluster-autoscaler/processors/provreq/pods_filter.go +++ b/cluster-autoscaler/processors/provreq/pods_filter.go @@ -22,7 +22,7 @@ import ( apiv1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1" - "k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1" + v1 "k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1" "k8s.io/autoscaler/cluster-autoscaler/context" "k8s.io/autoscaler/cluster-autoscaler/processors/pods" provreqpods "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/pods" diff --git a/cluster-autoscaler/processors/provreq/scaleup_enforcer.go b/cluster-autoscaler/processors/provreq/scaleup_enforcer.go new file mode 100644 index 000000000000..9592dbb073c2 --- /dev/null +++ b/cluster-autoscaler/processors/provreq/scaleup_enforcer.go @@ -0,0 +1,41 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package provreq + +import ( + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/processors/pods" +) + +// ProvisioningRequestScaleUpEnforcer forces scale up if there is any unschedulable pod that belongs to ProvisioningRequest. +type ProvisioningRequestScaleUpEnforcer struct { +} + +// NewProvisioningRequestScaleUpEnforcer creates a ProvisioningRequest scale up enforcer. +func NewProvisioningRequestScaleUpEnforcer() pods.ScaleUpEnforcer { + return &ProvisioningRequestScaleUpEnforcer{} +} + +// ShouldForceScaleUp forces scale up if there is any unschedulable pod that belongs to ProvisioningRequest. +func (p *ProvisioningRequestScaleUpEnforcer) ShouldForceScaleUp(unschedulablePods []*apiv1.Pod) bool { + for _, pod := range unschedulablePods { + if _, ok := provisioningRequestName(pod); ok { + return true + } + } + return false +} diff --git a/cluster-autoscaler/processors/provreq/scaleup_enforcer_test.go b/cluster-autoscaler/processors/provreq/scaleup_enforcer_test.go new file mode 100644 index 000000000000..a57851897662 --- /dev/null +++ b/cluster-autoscaler/processors/provreq/scaleup_enforcer_test.go @@ -0,0 +1,66 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package provreq + +import ( + "testing" + + "github.com/stretchr/testify/assert" + apiv1 "k8s.io/api/core/v1" + corev1 "k8s.io/api/core/v1" + v1 "k8s.io/autoscaler/cluster-autoscaler/apis/provisioningrequest/autoscaling.x-k8s.io/v1" + "k8s.io/autoscaler/cluster-autoscaler/provisioningrequest/pods" + testutils "k8s.io/autoscaler/cluster-autoscaler/utils/test" +) + +func TestProvisioningRequestScaleUpEnforcer(t *testing.T) { + prPod1 := testutils.BuildTestPod("pr-pod-1", 500, 10) + prPod1.Annotations[v1.ProvisioningRequestPodAnnotationKey] = "pr-class" + + prPod2 := testutils.BuildTestPod("pr-pod-2", 500, 10) + prPod2.Annotations[pods.DeprecatedProvisioningRequestPodAnnotationKey] = "pr-class-2" + + pod1 := testutils.BuildTestPod("pod-1", 500, 10) + pod2 := testutils.BuildTestPod("pod-2", 500, 10) + + testCases := map[string]struct { + unschedulablePods []*apiv1.Pod + want bool + }{ + "Any pod with ProvisioningRequest annotation key forces scale up": { + unschedulablePods: []*corev1.Pod{prPod1, pod1}, + want: true, + }, + "Any pod with ProvisioningRequest deprecated annotation key forces scale up": { + unschedulablePods: []*corev1.Pod{prPod2, pod1}, + want: true, + }, + "Pod without ProvisioningRequest annotation key don't force scale up": { + unschedulablePods: []*corev1.Pod{pod1, pod2}, + want: false, + }, + "No pods don't force scale up": { + unschedulablePods: []*corev1.Pod{}, + want: false, + }, + } + for _, test := range testCases { + scaleUpEnforcer := NewProvisioningRequestScaleUpEnforcer() + got := scaleUpEnforcer.ShouldForceScaleUp(test.unschedulablePods) + assert.Equal(t, got, test.want) + } +} diff --git a/cluster-autoscaler/processors/test/common.go b/cluster-autoscaler/processors/test/common.go index 07f69c16a346..065b06d92f99 100644 --- a/cluster-autoscaler/processors/test/common.go +++ b/cluster-autoscaler/processors/test/common.go @@ -31,6 +31,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset" "k8s.io/autoscaler/cluster-autoscaler/processors/nodeinfosprovider" "k8s.io/autoscaler/cluster-autoscaler/processors/nodes" + "k8s.io/autoscaler/cluster-autoscaler/processors/pods" "k8s.io/autoscaler/cluster-autoscaler/processors/scaledowncandidates" "k8s.io/autoscaler/cluster-autoscaler/processors/status" "k8s.io/autoscaler/cluster-autoscaler/simulator/scheduling" @@ -56,5 +57,6 @@ func NewTestProcessors(context *context.AutoscalingContext) *processors.Autoscal ScaleDownCandidatesNotifier: scaledowncandidates.NewObserversList(), ScaleStateNotifier: nodegroupchange.NewNodeGroupChangeObserversList(), AsyncNodeGroupStateChecker: asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker(), + ScaleUpEnforcer: pods.NewDefaultScaleUpEnforcer(), } }