@@ -1130,6 +1130,11 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error {
11301130 pool := machineconfigpool .DeepCopy ()
11311131 everything := metav1.LabelSelector {}
11321132
1133+ // If arbiter pool, requeue master pool update and only sync status
1134+ if pool .Name == ctrlcommon .MachineConfigPoolArbiter {
1135+ return ctrl .handleArbiterPoolEvent (pool )
1136+ }
1137+
11331138 if reflect .DeepEqual (pool .Spec .NodeSelector , & everything ) {
11341139 ctrl .eventRecorder .Eventf (pool , corev1 .EventTypeWarning , "SelectingAll" , "This machineconfigpool is selecting all nodes. A non-empty selector is required." )
11351140 return nil
@@ -1183,7 +1188,50 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error {
11831188 return err
11841189 }
11851190
1186- if err := ctrl .setClusterConfigAnnotation (nodes ); err != nil {
1191+ cc , err := ctrl .ccLister .Get (ctrlcommon .ControllerConfigName )
1192+ if err != nil {
1193+ return fmt .Errorf ("error getting controllerconfig %q, error: %w" , ctrlcommon .ControllerConfigName , err )
1194+ }
1195+ controlPlaneTopology := cc .Spec .Infra .Status .ControlPlaneTopology
1196+
1197+ // For master pool in HighlyAvailableArbiterMode, coordinate with arbiter pool
1198+ var arbiterPool * mcfgv1.MachineConfigPool
1199+ var arbiterNodes []* corev1.Node
1200+ var arbiterMosc * mcfgv1.MachineOSConfig
1201+ var arbiterMosb * mcfgv1.MachineOSBuild
1202+ var arbiterLayered bool
1203+ if pool .Name == ctrlcommon .MachineConfigPoolMaster && controlPlaneTopology == configv1 .HighlyAvailableArbiterMode {
1204+ arbiterObj , err := ctrl .mcpLister .Get (ctrlcommon .MachineConfigPoolArbiter )
1205+ if err != nil {
1206+ return fmt .Errorf ("error getting arbiter pool %q, error: %w" , ctrlcommon .MachineConfigPoolArbiter , err )
1207+ }
1208+ if arbiterObj .Spec .Configuration .Name != "" && arbiterObj .DeletionTimestamp == nil && ! arbiterObj .Spec .Paused {
1209+ arbiterPool = arbiterObj .DeepCopy ()
1210+ arbiterNodes , err = ctrl .getNodesForPool (arbiterPool )
1211+ if err != nil {
1212+ return fmt .Errorf ("error getting nodes for arbiter pool %q, error: %w" , ctrlcommon .MachineConfigPoolArbiter , err )
1213+ }
1214+ arbiterMosc , arbiterMosb , arbiterLayered , err = ctrl .getConfigAndBuildAndLayeredStatus (arbiterPool )
1215+ if err != nil {
1216+ return fmt .Errorf ("error getting config and build for arbiter pool %q, error: %w" , ctrlcommon .MachineConfigPoolArbiter , err )
1217+ }
1218+ combinedNodes := append ([]* corev1.Node {}, nodes ... )
1219+ combinedNodes = append (combinedNodes , arbiterNodes ... )
1220+ combinedMax , err := maxUnavailable (pool , combinedNodes )
1221+ if err != nil {
1222+ return fmt .Errorf ("error getting max unavailable count for pool %q, error: %w" , pool .Name , err )
1223+ }
1224+ arbiterUnavailable := len (getUnavailableMachines (arbiterNodes , arbiterPool ))
1225+ // Adjust maxunavail to account for arbiter unavailable nodes
1226+ // This ensures we don't exceed the combined maxUnavailable across both pools
1227+ maxunavail = combinedMax - arbiterUnavailable
1228+ if maxunavail < 0 {
1229+ maxunavail = 0
1230+ }
1231+ }
1232+ }
1233+
1234+ if err := ctrl .setClusterConfigAnnotation (nodes , controlPlaneTopology ); err != nil {
11871235 return fmt .Errorf ("error setting clusterConfig Annotation for node in pool %q, error: %w" , pool .Name , err )
11881236 }
11891237 // Taint all the nodes in the node pool, irrespective of their upgrade status.
@@ -1214,6 +1262,7 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error {
12141262 }
12151263 }
12161264 candidates , capacity := getAllCandidateMachines (layered , mosc , mosb , pool , nodes , maxunavail )
1265+ masterTargeted := 0
12171266 if len (candidates ) > 0 {
12181267 zones := make (map [string ]bool )
12191268 for _ , candidate := range candidates {
@@ -1230,8 +1279,107 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error {
12301279 }
12311280 return err
12321281 }
1282+ masterTargeted = len (candidates )
12331283 ctrlcommon .UpdateStateMetric (ctrlcommon .MCCSubControllerState , "machine-config-controller-node" , "Sync Machine Config Pool" , pool .Name )
12341284 }
1285+
1286+ // If coordinating with arbiter pool, also handle arbiter node updates
1287+ if arbiterPool != nil && len (arbiterNodes ) > 0 {
1288+ // Set cluster config annotation for arbiter nodes
1289+ if err := ctrl .setClusterConfigAnnotation (arbiterNodes , controlPlaneTopology ); err != nil {
1290+ return fmt .Errorf ("error setting clusterConfig Annotation for node in pool %q, error: %w" , arbiterPool .Name , err )
1291+ }
1292+
1293+ // Handle taints for arbiter nodes
1294+ for _ , node := range arbiterNodes {
1295+ hasInProgressTaint := checkIfNodeHasInProgressTaint (node )
1296+ lns := ctrlcommon .NewLayeredNodeState (node )
1297+ if (! arbiterLayered && lns .IsDesiredMachineConfigEqualToPool (arbiterPool ) && ! lns .AreImageAnnotationsPresentOnNode ()) || (arbiterLayered && lns .IsDesiredEqualToBuild (arbiterMosc , arbiterMosb )) {
1298+ if hasInProgressTaint {
1299+ if err := ctrl .removeUpdateInProgressTaint (ctx , node .Name ); err != nil {
1300+ err = fmt .Errorf ("failed removing %s taint for node %s: %w" , constants .NodeUpdateInProgressTaint .Key , node .Name , err )
1301+ klog .Error (err )
1302+ }
1303+ }
1304+ } else {
1305+ if ! hasInProgressTaint {
1306+ if err := ctrl .setUpdateInProgressTaint (ctx , node .Name ); err != nil {
1307+ err = fmt .Errorf ("failed applying %s taint for node %s: %w" , constants .NodeUpdateInProgressTaint .Key , node .Name , err )
1308+ klog .Error (err )
1309+ }
1310+ }
1311+ }
1312+ }
1313+
1314+ // Calculate remaining capacity for arbiter after master updates
1315+ masterUnavailable := len (getUnavailableMachines (nodes , pool ))
1316+ arbiterUnavailable := len (getUnavailableMachines (arbiterNodes , arbiterPool ))
1317+ combinedNodes := append ([]* corev1.Node {}, nodes ... )
1318+ combinedNodes = append (combinedNodes , arbiterNodes ... )
1319+ combinedMax , err := maxUnavailable (pool , combinedNodes )
1320+ if err == nil {
1321+ remainingCapacity := combinedMax - masterUnavailable - masterTargeted - arbiterUnavailable
1322+ if remainingCapacity < 0 {
1323+ remainingCapacity = 0
1324+ }
1325+ arbiterMaxUnavail := arbiterUnavailable + remainingCapacity
1326+ if arbiterMaxUnavail < 0 {
1327+ arbiterMaxUnavail = 0
1328+ }
1329+
1330+ arbiterCandidates , arbiterCapacity := getAllCandidateMachines (arbiterLayered , arbiterMosc , arbiterMosb , arbiterPool , arbiterNodes , arbiterMaxUnavail )
1331+ if len (arbiterCandidates ) > 0 {
1332+ zones := make (map [string ]bool )
1333+ for _ , candidate := range arbiterCandidates {
1334+ if zone , ok := candidate .Labels [zoneLabel ]; ok {
1335+ zones [zone ] = true
1336+ }
1337+ }
1338+ ctrl .logPool (arbiterPool , "%d candidate nodes in %d zones for update, capacity: %d" , len (arbiterCandidates ), len (zones ), arbiterCapacity )
1339+ if err := ctrl .updateCandidateMachines (arbiterLayered , arbiterMosc , arbiterMosb , arbiterPool , arbiterCandidates , arbiterCapacity ); err != nil {
1340+ if syncErr := ctrl .syncStatusOnly (arbiterPool ); syncErr != nil {
1341+ errs := kubeErrs .NewAggregate ([]error {syncErr , err })
1342+ return fmt .Errorf ("error setting annotations for pool %q, sync error: %w" , arbiterPool .Name , errs )
1343+ }
1344+ return err
1345+ }
1346+ ctrlcommon .UpdateStateMetric (ctrlcommon .MCCSubControllerState , "machine-config-controller-node" , "Sync Machine Config Pool" , arbiterPool .Name )
1347+ }
1348+
1349+ // Sync status for arbiter pool
1350+ if err := ctrl .syncStatusOnly (arbiterPool ); err != nil {
1351+ return err
1352+ }
1353+ }
1354+ }
1355+
1356+ return ctrl .syncStatusOnly (pool )
1357+ }
1358+
1359+ func (ctrl * Controller ) handleArbiterPoolEvent (pool * mcfgv1.MachineConfigPool ) error {
1360+ masterPool , err := ctrl .mcpLister .Get (ctrlcommon .MachineConfigPoolMaster )
1361+ if err == nil {
1362+ ctrl .enqueue (masterPool )
1363+ } else if ! errors .IsNotFound (err ) {
1364+ return err
1365+ }
1366+ // Still sync status for arbiter pool
1367+ if pool .DeletionTimestamp != nil || pool .Spec .Paused {
1368+ return ctrl .syncStatusOnly (pool )
1369+ }
1370+ mosc , mosb , layered , err := ctrl .getConfigAndBuildAndLayeredStatus (pool )
1371+ if err != nil {
1372+ return fmt .Errorf ("could not get config and build: %w" , err )
1373+ }
1374+ if layered {
1375+ _ , canApplyUpdates , err := ctrl .canLayeredContinue (mosc , mosb )
1376+ if err != nil {
1377+ return err
1378+ }
1379+ if ! canApplyUpdates {
1380+ return ctrl .syncStatusOnly (pool )
1381+ }
1382+ }
12351383 return ctrl .syncStatusOnly (pool )
12361384}
12371385
@@ -1277,17 +1425,12 @@ func (ctrl *Controller) getNodesForPool(pool *mcfgv1.MachineConfigPool) ([]*core
12771425// setClusterConfigAnnotation reads cluster configs set into controllerConfig
12781426// and add/updates required annotation to node such as ControlPlaneTopology
12791427// from infrastructure object.
1280- func (ctrl * Controller ) setClusterConfigAnnotation (nodes []* corev1.Node ) error {
1281- cc , err := ctrl .ccLister .Get (ctrlcommon .ControllerConfigName )
1282- if err != nil {
1283- return err
1284- }
1285-
1428+ func (ctrl * Controller ) setClusterConfigAnnotation (nodes []* corev1.Node , controlPlaneTopology configv1.TopologyMode ) error {
12861429 for _ , node := range nodes {
1287- if node .Annotations [daemonconsts .ClusterControlPlaneTopologyAnnotationKey ] != string (cc . Spec . Infra . Status . ControlPlaneTopology ) {
1430+ if node .Annotations [daemonconsts .ClusterControlPlaneTopologyAnnotationKey ] != string (controlPlaneTopology ) {
12881431 oldAnn := node .Annotations [daemonconsts .ClusterControlPlaneTopologyAnnotationKey ]
12891432 _ , err := internal .UpdateNodeRetry (ctrl .kubeClient .CoreV1 ().Nodes (), ctrl .nodeLister , node .Name , func (node * corev1.Node ) {
1290- node .Annotations [daemonconsts .ClusterControlPlaneTopologyAnnotationKey ] = string (cc . Spec . Infra . Status . ControlPlaneTopology )
1433+ node .Annotations [daemonconsts .ClusterControlPlaneTopologyAnnotationKey ] = string (controlPlaneTopology )
12911434 })
12921435 if err != nil {
12931436 return err
0 commit comments