Skip to content

Commit 6097518

Browse files
committed
cluster-autoscaler/dead-nodes: (temp) emulate TaintBasedEvictions
On our k8s v1.10 (configured without TaintBasedEvictions feature gate), recent CA versions fails to consider dead nodes (unkown status) as unschedulable. Likely Reason is newer CA versions relies on the /unreachable taint (provided by TaintBasedEvictions) for the node to be considered unschedulable. Our best, middle term fix will be to enable TaintBasedEvictions feature gate on 1.10 clusters. This will require a sufficient testing period, so until then, this (hopefuly) temporary patch should cover the case.
1 parent 9cf9a8f commit 6097518

File tree

1 file changed

+36
-2
lines changed

1 file changed

+36
-2
lines changed

cluster-autoscaler/core/filter_out_schedulable.go

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"k8s.io/autoscaler/cluster-autoscaler/core/utils"
2727
"k8s.io/autoscaler/cluster-autoscaler/metrics"
2828
"k8s.io/autoscaler/cluster-autoscaler/simulator"
29+
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
2930

3031
apiv1 "k8s.io/api/core/v1"
3132
klog "k8s.io/klog/v2"
@@ -143,7 +144,8 @@ func (p *filterOutSchedulablePodListProcessor) filterOutSchedulableByPacking(
143144
for _, pod := range unschedulableCandidates {
144145
scheduledOnHintedNode := false
145146
if hintedNodeName, hintFound := p.schedulablePodsNodeHints[pod.UID]; hintFound {
146-
if predicateChecker.CheckPredicates(clusterSnapshot, pod, hintedNodeName) == nil {
147+
nodeInfo, _ := clusterSnapshot.NodeInfos().Get(hintedNodeName)
148+
if predicateChecker.CheckPredicates(clusterSnapshot, pod, hintedNodeName) == nil && isLivingNode(nodeInfo) {
147149
// We treat predicate error and missing node error here in the same way
148150
scheduledOnHintedNode = true
149151
podsFilteredUsingHints++
@@ -184,7 +186,9 @@ func (p *filterOutSchedulablePodListProcessor) filterOutSchedulableByPacking(
184186
unschedulePodsCacheHitCounter++
185187
continue
186188
}
187-
nodeName, err := predicateChecker.FitsAnyNode(clusterSnapshot, pod)
189+
nodeName, err := predicateChecker.FitsAnyNodeMatching(clusterSnapshot, pod, func(nodeInfo *schedulerframework.NodeInfo) bool {
190+
return isLivingNode(nodeInfo)
191+
})
188192
if err == nil {
189193
klog.V(4).Infof("Pod %s.%s marked as unschedulable can be scheduled on node %s. Ignoring"+
190194
" in scale up.", pod.Namespace, pod.Name, nodeName)
@@ -204,6 +208,36 @@ func (p *filterOutSchedulablePodListProcessor) filterOutSchedulableByPacking(
204208
return unschedulablePods, nil
205209
}
206210

211+
// filter out dead nodes (having "unknown" NodeReady condition for over 10mn), so we can ignore them if hinted.
212+
// Needed for 1.10 clusters, until we set TaintBasedEvictions feature gate to "true" there (already enabled
213+
// by default on clusters using k8s v1.14 and up): TaintBasedEvictions places a node.kubernetes.io/unreachable
214+
// taint on dead nodes, that helps the CA to consider them unschedulable (unless explicitely tolerated).
215+
func isLivingNode(nodeInfo *schedulerframework.NodeInfo) bool {
216+
if nodeInfo == nil {
217+
// we only care about filtering out nodes having "unknown" status.
218+
return true
219+
}
220+
221+
node := nodeInfo.Node()
222+
if node == nil && node.Status.Conditions == nil {
223+
return true
224+
}
225+
226+
for _, cond := range node.Status.Conditions {
227+
if cond.Type != apiv1.NodeReady {
228+
continue
229+
}
230+
if cond.Status != apiv1.ConditionUnknown {
231+
continue
232+
}
233+
if cond.LastTransitionTime.Time.Add(10 * time.Minute).Before(time.Now()) {
234+
return false
235+
}
236+
}
237+
238+
return true
239+
}
240+
207241
func moreImportantPod(pod1, pod2 *apiv1.Pod) bool {
208242
// based on schedulers MoreImportantPod but does not compare Pod.Status.StartTime which does not make sense
209243
// for unschedulable pods

0 commit comments

Comments
 (0)