@@ -104,11 +104,26 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim
104104 // Emit the update run status metric based on status conditions in the updateRun.
105105 defer emitUpdateRunStatusMetric (updateRun )
106106
107+ // Early check for abandoned state - this is a terminal state, no initialization needed.
108+ state := updateRun .GetUpdateRunSpec ().State
109+ if state == placementv1beta1 .StateAbandoned {
110+ klog .V (2 ).InfoS ("The updateRun is abandoned, terminating" , "state" , state , "updateRun" , runObjRef )
111+ return runtime.Result {}, r .recordUpdateRunAbandoned (ctx , updateRun )
112+ } else if state == placementv1beta1 .StateStopped { // Early check for stopped state - pause the update run if needed.
113+ klog .V (2 ).InfoS ("The updateRun is stopped, waiting to resume" , "state" , state , "updateRun" , runObjRef )
114+ return runtime.Result {}, r .recordUpdateRunPaused (ctx , updateRun )
115+ }
116+
107117 var updatingStageIndex int
108118 var toBeUpdatedBindings , toBeDeletedBindings []placementv1beta1.BindingObj
109119 updateRunStatus := updateRun .GetUpdateRunStatus ()
110120 initCond := meta .FindStatusCondition (updateRunStatus .Conditions , string (placementv1beta1 .StagedUpdateRunConditionInitialized ))
111- if ! condition .IsConditionStatusTrue (initCond , updateRun .GetGeneration ()) {
121+ // Check if initialized regardless of generation.
122+ // The updateRun spec fields are immutable except for the state field. When the state changes,
123+ // the update run generation increments, but we don't need to reinitialize since initialization is a one-time setup.
124+ isInitialized := initCond != nil && initCond .Status == metav1 .ConditionTrue
125+ if ! isInitialized {
126+ // Check if initialization failed for the current generation.
112127 if condition .IsConditionStatusFalse (initCond , updateRun .GetGeneration ()) {
113128 klog .V (2 ).InfoS ("The updateRun has failed to initialize" , "errorMsg" , initCond .Message , "updateRun" , runObjRef )
114129 return runtime.Result {}, nil
@@ -122,7 +137,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim
122137 }
123138 return runtime.Result {}, initErr
124139 }
125- updatingStageIndex = 0 // start from the first stage.
140+ updatingStageIndex = 0 // start from the first stage (typically for NotStarted or Started states) .
126141 klog .V (2 ).InfoS ("Initialized the updateRun" , "updateRun" , runObjRef )
127142 } else {
128143 klog .V (2 ).InfoS ("The updateRun is initialized" , "updateRun" , runObjRef )
@@ -134,6 +149,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim
134149 }
135150 var validateErr error
136151 // Validate the updateRun status to ensure the update can be continued and get the updating stage index and cluster indices.
152+ // For Stopped → Started transition, this will resume from where it left off.
137153 if updatingStageIndex , toBeUpdatedBindings , toBeDeletedBindings , validateErr = r .validate (ctx , updateRun ); validateErr != nil {
138154 // errStagedUpdatedAborted cannot be retried.
139155 if errors .Is (validateErr , errStagedUpdatedAborted ) {
@@ -151,28 +167,32 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim
151167 }
152168
153169 // Execute the updateRun.
154- klog .V (2 ).InfoS ("Continue to execute the updateRun" , "updatingStageIndex" , updatingStageIndex , "updateRun" , runObjRef )
155- finished , waitTime , execErr := r .execute (ctx , updateRun , updatingStageIndex , toBeUpdatedBindings , toBeDeletedBindings )
156- if errors .Is (execErr , errStagedUpdatedAborted ) {
157- // errStagedUpdatedAborted cannot be retried.
158- return runtime.Result {}, r .recordUpdateRunFailed (ctx , updateRun , execErr .Error ())
159- }
170+ if state == placementv1beta1 .StateStarted {
171+ klog .V (2 ).InfoS ("Continue to execute the updateRun" , "updatingStageIndex" , updatingStageIndex , "updateRun" , runObjRef )
172+ finished , waitTime , execErr := r .execute (ctx , updateRun , updatingStageIndex , toBeUpdatedBindings , toBeDeletedBindings )
173+ if errors .Is (execErr , errStagedUpdatedAborted ) {
174+ // errStagedUpdatedAborted cannot be retried.
175+ return runtime.Result {}, r .recordUpdateRunFailed (ctx , updateRun , execErr .Error ())
176+ }
160177
161- if finished {
162- klog .V (2 ).InfoS ("The updateRun is completed" , "updateRun" , runObjRef )
163- return runtime.Result {}, r .recordUpdateRunSucceeded (ctx , updateRun )
164- }
178+ if finished {
179+ klog .V (2 ).InfoS ("The updateRun is completed" , "updateRun" , runObjRef )
180+ return runtime.Result {}, r .recordUpdateRunSucceeded (ctx , updateRun )
181+ }
165182
166- // The execution is not finished yet or it encounters a retriable error.
167- // We need to record the status and requeue.
168- if updateErr := r .recordUpdateRunStatus (ctx , updateRun ); updateErr != nil {
169- return runtime.Result {}, updateErr
170- }
171- klog .V (2 ).InfoS ("The updateRun is not finished yet" , "requeueWaitTime" , waitTime , "execErr" , execErr , "updateRun" , runObjRef )
172- if execErr != nil {
173- return runtime.Result {}, execErr
183+ // The execution is not finished yet or it encounters a retriable error.
184+ // We need to record the status and requeue.
185+ if updateErr := r .recordUpdateRunStatus (ctx , updateRun ); updateErr != nil {
186+ return runtime.Result {}, updateErr
187+ }
188+ klog .V (2 ).InfoS ("The updateRun is not finished yet" , "requeueWaitTime" , waitTime , "execErr" , execErr , "updateRun" , runObjRef )
189+ if execErr != nil {
190+ return runtime.Result {}, execErr
191+ }
192+ return runtime.Result {Requeue : true , RequeueAfter : waitTime }, nil
174193 }
175- return runtime.Result {Requeue : true , RequeueAfter : waitTime }, nil
194+ klog .V (2 ).InfoS ("The updateRun is not started, waiting to be started" , "state" , state , "updateRun" , runObjRef )
195+ return runtime.Result {}, nil
176196}
177197
178198// handleDelete handles the deletion of the updateRun object.
@@ -265,6 +285,50 @@ func (r *Reconciler) recordUpdateRunFailed(ctx context.Context, updateRun placem
265285 return nil
266286}
267287
288+ // recordUpdateRunPaused records the progressing condition as paused in the updateRun status.
289+ func (r * Reconciler ) recordUpdateRunPaused (ctx context.Context , updateRun placementv1beta1.UpdateRunObj ) error {
290+ updateRunStatus := updateRun .GetUpdateRunStatus ()
291+ meta .SetStatusCondition (& updateRunStatus .Conditions , metav1.Condition {
292+ Type : string (placementv1beta1 .StagedUpdateRunConditionProgressing ),
293+ Status : metav1 .ConditionFalse ,
294+ ObservedGeneration : updateRun .GetGeneration (),
295+ Reason : condition .UpdateRunPausedReason ,
296+ Message : "The update run is paused" ,
297+ })
298+ if updateErr := r .Client .Status ().Update (ctx , updateRun ); updateErr != nil {
299+ klog .ErrorS (updateErr , "Failed to update the updateRun status as paused" , "updateRun" , klog .KObj (updateRun ))
300+ // updateErr can be retried.
301+ return controller .NewUpdateIgnoreConflictError (updateErr )
302+ }
303+ return nil
304+ }
305+
306+ // recordUpdateRunAbandoned records the succeeded and progressing condition as abandoned in the updateRun status.
307+ func (r * Reconciler ) recordUpdateRunAbandoned (ctx context.Context , updateRun placementv1beta1.UpdateRunObj ) error {
308+ updateRunStatus := updateRun .GetUpdateRunStatus ()
309+ meta .SetStatusCondition (& updateRunStatus .Conditions , metav1.Condition {
310+ Type : string (placementv1beta1 .StagedUpdateRunConditionProgressing ),
311+ Status : metav1 .ConditionFalse ,
312+ ObservedGeneration : updateRun .GetGeneration (),
313+ Reason : condition .UpdateRunAbandonedReason ,
314+ Message : "The stages are aborted due to abandonment" ,
315+ })
316+ meta .SetStatusCondition (& updateRunStatus .Conditions , metav1.Condition {
317+ Type : string (placementv1beta1 .StagedUpdateRunConditionSucceeded ),
318+ Status : metav1 .ConditionFalse ,
319+ ObservedGeneration : updateRun .GetGeneration (),
320+ Reason : condition .UpdateRunAbandonedReason ,
321+ Message : "The update run has been abandoned" ,
322+ })
323+
324+ if updateErr := r .Client .Status ().Update (ctx , updateRun ); updateErr != nil {
325+ klog .ErrorS (updateErr , "Failed to update the updateRun status as failed" , "updateRun" , klog .KObj (updateRun ))
326+ // updateErr can be retried.
327+ return controller .NewUpdateIgnoreConflictError (updateErr )
328+ }
329+ return nil
330+ }
331+
268332// recordUpdateRunStatus records the updateRun status.
269333func (r * Reconciler ) recordUpdateRunStatus (ctx context.Context , updateRun placementv1beta1.UpdateRunObj ) error {
270334 if updateErr := r .Client .Status ().Update (ctx , updateRun ); updateErr != nil {
0 commit comments