Skip to content

Commit e893521

Browse files
authored
Merge pull request #7667 from jbtk/max-nodes-events
Improve events when max total nodes of the cluster is reached.
2 parents ba94506 + b8db30c commit e893521

File tree

3 files changed

+42
-3
lines changed

3 files changed

+42
-3
lines changed

Diff for: cluster-autoscaler/core/static_autoscaler.go

+11
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,18 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr
528528
} else if a.MaxNodesTotal > 0 && len(readyNodes) >= a.MaxNodesTotal {
529529
scaleUpStatus.Result = status.ScaleUpLimitedByMaxNodesTotal
530530
klog.Warningf("Max total nodes in cluster reached: %v. Current number of ready nodes: %v", a.MaxNodesTotal, len(readyNodes))
531+
autoscalingContext.LogRecorder.Eventf(apiv1.EventTypeWarning, "MaxNodesTotalReached",
532+
"Max total nodes in cluster reached: %v", autoscalingContext.MaxNodesTotal)
531533
shouldScaleUp = false
534+
535+
noScaleUpInfoForPods := []status.NoScaleUpInfo{}
536+
for _, pod := range unschedulablePodsToHelp {
537+
noScaleUpInfo := status.NoScaleUpInfo{
538+
Pod: pod,
539+
}
540+
noScaleUpInfoForPods = append(noScaleUpInfoForPods, noScaleUpInfo)
541+
}
542+
scaleUpStatus.PodsRemainUnschedulable = noScaleUpInfoForPods
532543
} else if len(a.BypassedSchedulers) == 0 && allPodsAreNew(unschedulablePodsToHelp, currentTime) {
533544
// The assumption here is that these pods have been created very recently and probably there
534545
// is more pods to come. In theory we could check the newest pod time but then if pod were created

Diff for: cluster-autoscaler/processors/status/eventing_scale_up_processor.go

+6-2
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ func (p *EventingScaleUpStatusProcessor) Process(context *context.AutoscalingCon
4141
for _, noScaleUpInfo := range status.PodsRemainUnschedulable {
4242
context.Recorder.Event(noScaleUpInfo.Pod, apiv1.EventTypeNormal, "NotTriggerScaleUp",
4343
fmt.Sprintf("pod didn't trigger scale-up: %s",
44-
ReasonsMessage(noScaleUpInfo, consideredNodeGroupsMap)))
44+
ReasonsMessage(status.Result, noScaleUpInfo, consideredNodeGroupsMap)))
4545
}
4646
} else {
4747
klog.V(4).Infof("Skipping event processing for unschedulable pods since there is a" +
@@ -60,7 +60,11 @@ func (p *EventingScaleUpStatusProcessor) CleanUp() {
6060
}
6161

6262
// ReasonsMessage aggregates reasons from NoScaleUpInfos.
63-
func ReasonsMessage(noScaleUpInfo NoScaleUpInfo, consideredNodeGroups map[string]cloudprovider.NodeGroup) string {
63+
func ReasonsMessage(scaleUpStatus ScaleUpResult, noScaleUpInfo NoScaleUpInfo, consideredNodeGroups map[string]cloudprovider.NodeGroup) string {
64+
if scaleUpStatus == ScaleUpLimitedByMaxNodesTotal {
65+
return "max total nodes in cluster reached"
66+
}
67+
6468
messages := []string{}
6569
aggregated := map[string]int{}
6670
for nodeGroupId, reasons := range noScaleUpInfo.RejectedNodeGroups {

Diff for: cluster-autoscaler/processors/status/eventing_scale_up_processor_test.go

+25-1
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,21 @@ func TestEventingScaleUpStatusProcessor(t *testing.T) {
101101
expectedTriggered: 0,
102102
expectedNoTriggered: 0,
103103
},
104+
{
105+
caseName: "No scale up; max total nodes in cluster reached",
106+
state: &ScaleUpStatus{
107+
Result: ScaleUpLimitedByMaxNodesTotal,
108+
ScaleUpInfos: []nodegroupset.ScaleUpInfo{{}},
109+
PodsTriggeredScaleUp: []*apiv1.Pod{},
110+
PodsRemainUnschedulable: []NoScaleUpInfo{
111+
{Pod: p1},
112+
{Pod: p2},
113+
{Pod: p3},
114+
},
115+
},
116+
expectedTriggered: 0,
117+
expectedNoTriggered: 3,
118+
},
104119
}
105120

106121
for _, tc := range testCases {
@@ -166,9 +181,18 @@ func TestReasonsMessage(t *testing.T) {
166181
"2 max limit reached",
167182
"1 not ready",
168183
}
169-
result := ReasonsMessage(NoScaleUpInfo{nil, rejected, skipped}, considered)
184+
result := ReasonsMessage(ScaleUpNoOptionsAvailable, NoScaleUpInfo{nil, rejected, skipped}, considered)
170185

171186
for _, part := range expected {
172187
assert.Contains(t, result, part)
173188
}
174189
}
190+
191+
func TestReasonsMessageWhenScaleUpLimitedByMaxNodesTotal(t *testing.T) {
192+
considered := map[string]cloudprovider.NodeGroup{}
193+
noScaleUpInfo := NoScaleUpInfo{
194+
Pod: nil,
195+
}
196+
result := ReasonsMessage(ScaleUpLimitedByMaxNodesTotal, noScaleUpInfo, considered)
197+
assert.Contains(t, result, "max total nodes in cluster reached")
198+
}

0 commit comments

Comments
 (0)