Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 152 additions & 9 deletions pkg/controller/node/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1130,6 +1130,11 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error {
pool := machineconfigpool.DeepCopy()
everything := metav1.LabelSelector{}

// If arbiter pool, requeue master pool update and only sync status
if pool.Name == ctrlcommon.MachineConfigPoolArbiter {
return ctrl.handleArbiterPoolEvent(pool)
}

if reflect.DeepEqual(pool.Spec.NodeSelector, &everything) {
ctrl.eventRecorder.Eventf(pool, corev1.EventTypeWarning, "SelectingAll", "This machineconfigpool is selecting all nodes. A non-empty selector is required.")
return nil
Expand Down Expand Up @@ -1183,7 +1188,50 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error {
return err
}

if err := ctrl.setClusterConfigAnnotation(nodes); err != nil {
cc, err := ctrl.ccLister.Get(ctrlcommon.ControllerConfigName)
if err != nil {
return fmt.Errorf("error getting controllerconfig %q, error: %w", ctrlcommon.ControllerConfigName, err)
}
controlPlaneTopology := cc.Spec.Infra.Status.ControlPlaneTopology

// For master pool in HighlyAvailableArbiterMode, coordinate with arbiter pool
var arbiterPool *mcfgv1.MachineConfigPool
var arbiterNodes []*corev1.Node
var arbiterMosc *mcfgv1.MachineOSConfig
var arbiterMosb *mcfgv1.MachineOSBuild
var arbiterLayered bool
if pool.Name == ctrlcommon.MachineConfigPoolMaster && controlPlaneTopology == configv1.HighlyAvailableArbiterMode {
arbiterObj, err := ctrl.mcpLister.Get(ctrlcommon.MachineConfigPoolArbiter)
if err != nil {
return fmt.Errorf("error getting arbiter pool %q, error: %w", ctrlcommon.MachineConfigPoolArbiter, err)
}
if arbiterObj.Spec.Configuration.Name != "" && arbiterObj.DeletionTimestamp == nil && !arbiterObj.Spec.Paused {
arbiterPool = arbiterObj.DeepCopy()
arbiterNodes, err = ctrl.getNodesForPool(arbiterPool)
if err != nil {
return fmt.Errorf("error getting nodes for arbiter pool %q, error: %w", ctrlcommon.MachineConfigPoolArbiter, err)
}
arbiterMosc, arbiterMosb, arbiterLayered, err = ctrl.getConfigAndBuildAndLayeredStatus(arbiterPool)
if err != nil {
return fmt.Errorf("error getting config and build for arbiter pool %q, error: %w", ctrlcommon.MachineConfigPoolArbiter, err)
}
combinedNodes := append([]*corev1.Node{}, nodes...)
combinedNodes = append(combinedNodes, arbiterNodes...)
combinedMax, err := maxUnavailable(pool, combinedNodes)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, this basically implies that the arbiter pool's maxUnavailable no longer has an effect. Maybe we should not allow setting that field at all.

(in practice nobody should be fiddling with maxUnavailable for either masters or arbiters, so it probably doesn't matter, just wanted to note that just in case)

if err != nil {
return fmt.Errorf("error getting max unavailable count for pool %q, error: %w", pool.Name, err)
}
arbiterUnavailable := len(getUnavailableMachines(arbiterNodes, arbiterPool))
// Adjust maxunavail to account for arbiter unavailable nodes
// This ensures we don't exceed the combined maxUnavailable across both pools
maxunavail = combinedMax - arbiterUnavailable
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the previously set maxunavail should just be a value set in the pool, and the candidate selection below filters for any in progress or not ready nodes. Given that we have some complex logic below, is it needed to subtract arbiter here?

if maxunavail < 0 {
maxunavail = 0
}
}
}

if err := ctrl.setClusterConfigAnnotation(nodes, controlPlaneTopology); err != nil {
return fmt.Errorf("error setting clusterConfig Annotation for node in pool %q, error: %w", pool.Name, err)
}
// Taint all the nodes in the node pool, irrespective of their upgrade status.
Expand Down Expand Up @@ -1214,6 +1262,7 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error {
}
}
candidates, capacity := getAllCandidateMachines(layered, mosc, mosb, pool, nodes, maxunavail)
masterTargeted := 0
if len(candidates) > 0 {
zones := make(map[string]bool)
for _, candidate := range candidates {
Expand All @@ -1230,8 +1279,107 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error {
}
return err
}
masterTargeted = len(candidates)
ctrlcommon.UpdateStateMetric(ctrlcommon.MCCSubControllerState, "machine-config-controller-node", "Sync Machine Config Pool", pool.Name)
}

// If coordinating with arbiter pool, also handle arbiter node updates
if arbiterPool != nil && len(arbiterNodes) > 0 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this whole section also be gated via if pool.Name == ctrlcommon.MachineConfigPoolMaster && controlPlaneTopology == configv1.HighlyAvailableArbiterMode similar to above? It seems possible that we'd be syncing the arbiter pool during a worker sync which can happen in parallel with masters, which might be unsafe

// Set cluster config annotation for arbiter nodes
if err := ctrl.setClusterConfigAnnotation(arbiterNodes, controlPlaneTopology); err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wondering if we could combine this with the same function call earlier somehow, or move it closer (although I guess functionally speaking it shouldn't matter)

There's quite a bit of duplicated code in general. I guess the main reason we can't just merge into the above functions is that arbiter has a different desiredConfig annotation it would need to set? Would it be easier if we modified the updateCandidateMachines function to account for that and treat the arbiter node as a master node in this function?

return fmt.Errorf("error setting clusterConfig Annotation for node in pool %q, error: %w", arbiterPool.Name, err)
}

// Handle taints for arbiter nodes
for _, node := range arbiterNodes {
hasInProgressTaint := checkIfNodeHasInProgressTaint(node)
lns := ctrlcommon.NewLayeredNodeState(node)
if (!arbiterLayered && lns.IsDesiredMachineConfigEqualToPool(arbiterPool) && !lns.AreImageAnnotationsPresentOnNode()) || (arbiterLayered && lns.IsDesiredEqualToBuild(arbiterMosc, arbiterMosb)) {
if hasInProgressTaint {
if err := ctrl.removeUpdateInProgressTaint(ctx, node.Name); err != nil {
err = fmt.Errorf("failed removing %s taint for node %s: %w", constants.NodeUpdateInProgressTaint.Key, node.Name, err)
klog.Error(err)
}
}
} else {
if !hasInProgressTaint {
if err := ctrl.setUpdateInProgressTaint(ctx, node.Name); err != nil {
err = fmt.Errorf("failed applying %s taint for node %s: %w", constants.NodeUpdateInProgressTaint.Key, node.Name, err)
klog.Error(err)
}
}
}
}

// Calculate remaining capacity for arbiter after master updates
masterUnavailable := len(getUnavailableMachines(nodes, pool))
arbiterUnavailable := len(getUnavailableMachines(arbiterNodes, arbiterPool))
combinedNodes := append([]*corev1.Node{}, nodes...)
combinedNodes = append(combinedNodes, arbiterNodes...)
combinedMax, err := maxUnavailable(pool, combinedNodes)
if err == nil {
remainingCapacity := combinedMax - masterUnavailable - masterTargeted - arbiterUnavailable
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm having trouble following this set of logic we used for calculation. Given that we calculated the capacity (including arbiter) earlier, and tracked the amount of masters being updated in this round, wouldn't it be just be capacity - masterTargeted? Would it be possible to simplify this logic somehow? It feels like a lot of potentially unnecessary calculations on top of all the duplication, and would be hard to maintain in the future. I think I still prefer somehow merging arbiter into master calculation and keep most of the function unduplicated, just set the desired annotation differently if arbiter gets selected. Ultimately this function is designed to update some nodes' annotations to the new desired config, and I'm hoping we can keep the core functionality the same without having to special case too much.

if remainingCapacity < 0 {
remainingCapacity = 0
}
arbiterMaxUnavail := arbiterUnavailable + remainingCapacity
if arbiterMaxUnavail < 0 {
arbiterMaxUnavail = 0
}

arbiterCandidates, arbiterCapacity := getAllCandidateMachines(arbiterLayered, arbiterMosc, arbiterMosb, arbiterPool, arbiterNodes, arbiterMaxUnavail)
if len(arbiterCandidates) > 0 {
zones := make(map[string]bool)
for _, candidate := range arbiterCandidates {
if zone, ok := candidate.Labels[zoneLabel]; ok {
zones[zone] = true
}
}
ctrl.logPool(arbiterPool, "%d candidate nodes in %d zones for update, capacity: %d", len(arbiterCandidates), len(zones), arbiterCapacity)
if err := ctrl.updateCandidateMachines(arbiterLayered, arbiterMosc, arbiterMosb, arbiterPool, arbiterCandidates, arbiterCapacity); err != nil {
if syncErr := ctrl.syncStatusOnly(arbiterPool); syncErr != nil {
errs := kubeErrs.NewAggregate([]error{syncErr, err})
return fmt.Errorf("error setting annotations for pool %q, sync error: %w", arbiterPool.Name, errs)
}
return err
}
ctrlcommon.UpdateStateMetric(ctrlcommon.MCCSubControllerState, "machine-config-controller-node", "Sync Machine Config Pool", arbiterPool.Name)
}

// Sync status for arbiter pool
if err := ctrl.syncStatusOnly(arbiterPool); err != nil {
return err
}
}
}

return ctrl.syncStatusOnly(pool)
}

func (ctrl *Controller) handleArbiterPoolEvent(pool *mcfgv1.MachineConfigPool) error {
masterPool, err := ctrl.mcpLister.Get(ctrlcommon.MachineConfigPoolMaster)
if err == nil {
ctrl.enqueue(masterPool)
} else if !errors.IsNotFound(err) {
return err
}
// Still sync status for arbiter pool
if pool.DeletionTimestamp != nil || pool.Spec.Paused {
return ctrl.syncStatusOnly(pool)
}
mosc, mosb, layered, err := ctrl.getConfigAndBuildAndLayeredStatus(pool)
if err != nil {
return fmt.Errorf("could not get config and build: %w", err)
}
if layered {
_, canApplyUpdates, err := ctrl.canLayeredContinue(mosc, mosb)
if err != nil {
return err
}
if !canApplyUpdates {
return ctrl.syncStatusOnly(pool)
}
}
return ctrl.syncStatusOnly(pool)
}

Expand Down Expand Up @@ -1277,17 +1425,12 @@ func (ctrl *Controller) getNodesForPool(pool *mcfgv1.MachineConfigPool) ([]*core
// setClusterConfigAnnotation reads cluster configs set into controllerConfig
// and add/updates required annotation to node such as ControlPlaneTopology
// from infrastructure object.
func (ctrl *Controller) setClusterConfigAnnotation(nodes []*corev1.Node) error {
cc, err := ctrl.ccLister.Get(ctrlcommon.ControllerConfigName)
if err != nil {
return err
}

func (ctrl *Controller) setClusterConfigAnnotation(nodes []*corev1.Node, controlPlaneTopology configv1.TopologyMode) error {
for _, node := range nodes {
if node.Annotations[daemonconsts.ClusterControlPlaneTopologyAnnotationKey] != string(cc.Spec.Infra.Status.ControlPlaneTopology) {
if node.Annotations[daemonconsts.ClusterControlPlaneTopologyAnnotationKey] != string(controlPlaneTopology) {
oldAnn := node.Annotations[daemonconsts.ClusterControlPlaneTopologyAnnotationKey]
_, err := internal.UpdateNodeRetry(ctrl.kubeClient.CoreV1().Nodes(), ctrl.nodeLister, node.Name, func(node *corev1.Node) {
node.Annotations[daemonconsts.ClusterControlPlaneTopologyAnnotationKey] = string(cc.Spec.Infra.Status.ControlPlaneTopology)
node.Annotations[daemonconsts.ClusterControlPlaneTopologyAnnotationKey] = string(controlPlaneTopology)
})
if err != nil {
return err
Expand Down
71 changes: 71 additions & 0 deletions pkg/controller/node/node_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1772,3 +1772,74 @@ func filterLastTransitionTime(obj runtime.Object) runtime.Object {
}
return o
}

func TestArbiterPoolCoordination(t *testing.T) {
t.Parallel()
f := newFixture(t)

// Create controller config with HighlyAvailableArbiterMode
cc := newControllerConfig(ctrlcommon.ControllerConfigName, configv1.HighlyAvailableArbiterMode)
f.ccLister = append(f.ccLister, cc)
f.objects = append(f.objects, cc)

// Create master pool with new config
masterPool := helpers.NewMachineConfigPool(ctrlcommon.MachineConfigPoolMaster, nil, helpers.MasterSelector, machineConfigV1)
masterPool.Spec.Configuration.Name = machineConfigV2
f.mcpLister = append(f.mcpLister, masterPool)
f.objects = append(f.objects, masterPool)

// Create arbiter pool with new config
arbiterSelector := metav1.AddLabelToSelector(&metav1.LabelSelector{}, "node-role.kubernetes.io/arbiter", "")
arbiterPool := helpers.NewMachineConfigPool(ctrlcommon.MachineConfigPoolArbiter, nil, arbiterSelector, machineConfigV1)
arbiterPool.Spec.Configuration.Name = machineConfigV2
f.mcpLister = append(f.mcpLister, arbiterPool)
f.objects = append(f.objects, arbiterPool)

// Create master node with correct label format
masterNode := helpers.NewNodeWithReady("master-node-0", machineConfigV1, machineConfigV1, corev1.ConditionTrue)
masterNode.Labels = map[string]string{
"node-role/master": "",
}
f.nodeLister = append(f.nodeLister, masterNode)
f.kubeobjects = append(f.kubeobjects, masterNode)

// Create arbiter node
arbiterNode := helpers.NewNodeWithReady("arbiter-node-0", machineConfigV1, machineConfigV1, corev1.ConditionTrue)
arbiterNode.Labels = map[string]string{
"node-role.kubernetes.io/arbiter": "",
}
f.nodeLister = append(f.nodeLister, arbiterNode)
f.kubeobjects = append(f.kubeobjects, arbiterNode)

// Test: When master pool syncs in arbiter mode, it should coordinate both pools
// Expect status updates for both pools (arbiter first, then master)
f.expectUpdateMachineConfigPoolStatus(arbiterPool)
f.expectUpdateMachineConfigPoolStatus(masterPool)

// Sync master pool - this should coordinate both pools
c := f.newController()
err := c.syncHandler(ctrlcommon.MachineConfigPoolMaster)
require.NoError(t, err)

// Verify that both pools had their status updated
actions := filterInformerActions(f.client.Actions())
statusUpdates := 0
for _, action := range actions {
if action.Matches("update", "machineconfigpools") && action.GetSubresource() == "status" {
statusUpdates++
}
}
// Should have status updates for both master and arbiter pools
assert.GreaterOrEqual(t, statusUpdates, 2, "Expected at least 2 status updates (master and arbiter pools)")

// Verify that both nodes were patched (for desired config)
k8sActions := filterInformerActions(f.kubeclient.Actions())
nodePatches := 0
for _, action := range k8sActions {
if action.Matches("patch", "nodes") {
nodePatches++
}
}
// Should have patches for both master and arbiter nodes
assert.GreaterOrEqual(t, nodePatches, 2, "Expected at least 2 node patches (master and arbiter nodes)")
}