Skip to content

Commit 6a68b64

Browse files
committed
Start/Stop API Implementation
Signed-off-by: Britania Rodriguez Reyes <[email protected]>
1 parent d7e9d01 commit 6a68b64

File tree

11 files changed

+1427
-203
lines changed

11 files changed

+1427
-203
lines changed

pkg/controllers/updaterun/controller.go

Lines changed: 85 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,26 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim
104104
// Emit the update run status metric based on status conditions in the updateRun.
105105
defer emitUpdateRunStatusMetric(updateRun)
106106

107+
// Early check for abandoned state - this is a terminal state, no initialization needed.
108+
state := updateRun.GetUpdateRunSpec().State
109+
if state == placementv1beta1.StateAbandoned {
110+
klog.V(2).InfoS("The updateRun is abandoned, terminating", "state", state, "updateRun", runObjRef)
111+
return runtime.Result{}, r.recordUpdateRunAbandoned(ctx, updateRun)
112+
} else if state == placementv1beta1.StateStopped { // Early check for stopped state - pause the update run if needed.
113+
klog.V(2).InfoS("The updateRun is stopped, waiting to resume", "state", state, "updateRun", runObjRef)
114+
return runtime.Result{}, r.recordUpdateRunPaused(ctx, updateRun)
115+
}
116+
107117
var updatingStageIndex int
108118
var toBeUpdatedBindings, toBeDeletedBindings []placementv1beta1.BindingObj
109119
updateRunStatus := updateRun.GetUpdateRunStatus()
110120
initCond := meta.FindStatusCondition(updateRunStatus.Conditions, string(placementv1beta1.StagedUpdateRunConditionInitialized))
111-
if !condition.IsConditionStatusTrue(initCond, updateRun.GetGeneration()) {
121+
// Check if initialized regardless of generation.
122+
// The updateRun spec fields are immutable except for the state field. When the state changes,
123+
// the update run generation increments, but we don't need to reinitialize since initialization is a one-time setup.
124+
isInitialized := initCond != nil && initCond.Status == metav1.ConditionTrue
125+
if !isInitialized {
126+
// Check if initialization failed for the current generation.
112127
if condition.IsConditionStatusFalse(initCond, updateRun.GetGeneration()) {
113128
klog.V(2).InfoS("The updateRun has failed to initialize", "errorMsg", initCond.Message, "updateRun", runObjRef)
114129
return runtime.Result{}, nil
@@ -122,7 +137,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim
122137
}
123138
return runtime.Result{}, initErr
124139
}
125-
updatingStageIndex = 0 // start from the first stage.
140+
updatingStageIndex = 0 // start from the first stage (typically for NotStarted or Started states).
126141
klog.V(2).InfoS("Initialized the updateRun", "updateRun", runObjRef)
127142
} else {
128143
klog.V(2).InfoS("The updateRun is initialized", "updateRun", runObjRef)
@@ -134,6 +149,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim
134149
}
135150
var validateErr error
136151
// Validate the updateRun status to ensure the update can be continued and get the updating stage index and cluster indices.
152+
// For Stopped → Started transition, this will resume from where it left off.
137153
if updatingStageIndex, toBeUpdatedBindings, toBeDeletedBindings, validateErr = r.validate(ctx, updateRun); validateErr != nil {
138154
// errStagedUpdatedAborted cannot be retried.
139155
if errors.Is(validateErr, errStagedUpdatedAborted) {
@@ -151,28 +167,32 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim
151167
}
152168

153169
// Execute the updateRun.
154-
klog.V(2).InfoS("Continue to execute the updateRun", "updatingStageIndex", updatingStageIndex, "updateRun", runObjRef)
155-
finished, waitTime, execErr := r.execute(ctx, updateRun, updatingStageIndex, toBeUpdatedBindings, toBeDeletedBindings)
156-
if errors.Is(execErr, errStagedUpdatedAborted) {
157-
// errStagedUpdatedAborted cannot be retried.
158-
return runtime.Result{}, r.recordUpdateRunFailed(ctx, updateRun, execErr.Error())
159-
}
170+
if state == placementv1beta1.StateStarted {
171+
klog.V(2).InfoS("Continue to execute the updateRun", "updatingStageIndex", updatingStageIndex, "updateRun", runObjRef)
172+
finished, waitTime, execErr := r.execute(ctx, updateRun, updatingStageIndex, toBeUpdatedBindings, toBeDeletedBindings)
173+
if errors.Is(execErr, errStagedUpdatedAborted) {
174+
// errStagedUpdatedAborted cannot be retried.
175+
return runtime.Result{}, r.recordUpdateRunFailed(ctx, updateRun, execErr.Error())
176+
}
160177

161-
if finished {
162-
klog.V(2).InfoS("The updateRun is completed", "updateRun", runObjRef)
163-
return runtime.Result{}, r.recordUpdateRunSucceeded(ctx, updateRun)
164-
}
178+
if finished {
179+
klog.V(2).InfoS("The updateRun is completed", "updateRun", runObjRef)
180+
return runtime.Result{}, r.recordUpdateRunSucceeded(ctx, updateRun)
181+
}
165182

166-
// The execution is not finished yet or it encounters a retriable error.
167-
// We need to record the status and requeue.
168-
if updateErr := r.recordUpdateRunStatus(ctx, updateRun); updateErr != nil {
169-
return runtime.Result{}, updateErr
170-
}
171-
klog.V(2).InfoS("The updateRun is not finished yet", "requeueWaitTime", waitTime, "execErr", execErr, "updateRun", runObjRef)
172-
if execErr != nil {
173-
return runtime.Result{}, execErr
183+
// The execution is not finished yet or it encounters a retriable error.
184+
// We need to record the status and requeue.
185+
if updateErr := r.recordUpdateRunStatus(ctx, updateRun); updateErr != nil {
186+
return runtime.Result{}, updateErr
187+
}
188+
klog.V(2).InfoS("The updateRun is not finished yet", "requeueWaitTime", waitTime, "execErr", execErr, "updateRun", runObjRef)
189+
if execErr != nil {
190+
return runtime.Result{}, execErr
191+
}
192+
return runtime.Result{Requeue: true, RequeueAfter: waitTime}, nil
174193
}
175-
return runtime.Result{Requeue: true, RequeueAfter: waitTime}, nil
194+
klog.V(2).InfoS("The updateRun is not started, waiting to be started", "state", state, "updateRun", runObjRef)
195+
return runtime.Result{}, nil
176196
}
177197

178198
// handleDelete handles the deletion of the updateRun object.
@@ -265,6 +285,50 @@ func (r *Reconciler) recordUpdateRunFailed(ctx context.Context, updateRun placem
265285
return nil
266286
}
267287

288+
// recordUpdateRunPaused records the progressing condition as paused in the updateRun status.
289+
func (r *Reconciler) recordUpdateRunPaused(ctx context.Context, updateRun placementv1beta1.UpdateRunObj) error {
290+
updateRunStatus := updateRun.GetUpdateRunStatus()
291+
meta.SetStatusCondition(&updateRunStatus.Conditions, metav1.Condition{
292+
Type: string(placementv1beta1.StagedUpdateRunConditionProgressing),
293+
Status: metav1.ConditionFalse,
294+
ObservedGeneration: updateRun.GetGeneration(),
295+
Reason: condition.UpdateRunPausedReason,
296+
Message: "The update run is paused",
297+
})
298+
if updateErr := r.Client.Status().Update(ctx, updateRun); updateErr != nil {
299+
klog.ErrorS(updateErr, "Failed to update the updateRun status as paused", "updateRun", klog.KObj(updateRun))
300+
// updateErr can be retried.
301+
return controller.NewUpdateIgnoreConflictError(updateErr)
302+
}
303+
return nil
304+
}
305+
306+
// recordUpdateRunAbandoned records the succeeded and progressing condition as abandoned in the updateRun status.
307+
func (r *Reconciler) recordUpdateRunAbandoned(ctx context.Context, updateRun placementv1beta1.UpdateRunObj) error {
308+
updateRunStatus := updateRun.GetUpdateRunStatus()
309+
meta.SetStatusCondition(&updateRunStatus.Conditions, metav1.Condition{
310+
Type: string(placementv1beta1.StagedUpdateRunConditionProgressing),
311+
Status: metav1.ConditionFalse,
312+
ObservedGeneration: updateRun.GetGeneration(),
313+
Reason: condition.UpdateRunAbandonedReason,
314+
Message: "The stages are aborted due to abandonment",
315+
})
316+
meta.SetStatusCondition(&updateRunStatus.Conditions, metav1.Condition{
317+
Type: string(placementv1beta1.StagedUpdateRunConditionSucceeded),
318+
Status: metav1.ConditionFalse,
319+
ObservedGeneration: updateRun.GetGeneration(),
320+
Reason: condition.UpdateRunAbandonedReason,
321+
Message: "The update run has been abandoned",
322+
})
323+
324+
if updateErr := r.Client.Status().Update(ctx, updateRun); updateErr != nil {
325+
klog.ErrorS(updateErr, "Failed to update the updateRun status as failed", "updateRun", klog.KObj(updateRun))
326+
// updateErr can be retried.
327+
return controller.NewUpdateIgnoreConflictError(updateErr)
328+
}
329+
return nil
330+
}
331+
268332
// recordUpdateRunStatus records the updateRun status.
269333
func (r *Reconciler) recordUpdateRunStatus(ctx context.Context, updateRun placementv1beta1.UpdateRunObj) error {
270334
if updateErr := r.Client.Status().Update(ctx, updateRun); updateErr != nil {

pkg/controllers/updaterun/controller_integration_test.go

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,16 @@ func generateMetricsLabels(
272272
}
273273
}
274274

275+
func generateInitializationSucceededMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric {
276+
return &prometheusclientmodel.Metric{
277+
Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionInitialized),
278+
string(metav1.ConditionTrue), condition.UpdateRunInitializeSucceededReason),
279+
Gauge: &prometheusclientmodel.Gauge{
280+
Value: ptr.To(float64(time.Now().UnixNano()) / 1e9),
281+
},
282+
}
283+
}
284+
275285
func generateInitializationFailedMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric {
276286
return &prometheusclientmodel.Metric{
277287
Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionInitialized),
@@ -312,6 +322,26 @@ func generateStuckMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *pr
312322
}
313323
}
314324

325+
func generatePausedMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric {
326+
return &prometheusclientmodel.Metric{
327+
Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionProgressing),
328+
string(metav1.ConditionFalse), condition.UpdateRunPausedReason),
329+
Gauge: &prometheusclientmodel.Gauge{
330+
Value: ptr.To(float64(time.Now().UnixNano()) / 1e9),
331+
},
332+
}
333+
}
334+
335+
func generateAbandonedMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric {
336+
return &prometheusclientmodel.Metric{
337+
Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionSucceeded),
338+
string(metav1.ConditionFalse), condition.UpdateRunAbandonedReason),
339+
Gauge: &prometheusclientmodel.Gauge{
340+
Value: ptr.To(float64(time.Now().UnixNano()) / 1e9),
341+
},
342+
}
343+
}
344+
315345
func generateFailedMetric(updateRun *placementv1beta1.ClusterStagedUpdateRun) *prometheusclientmodel.Metric {
316346
return &prometheusclientmodel.Metric{
317347
Label: generateMetricsLabels(updateRun, string(placementv1beta1.StagedUpdateRunConditionSucceeded),
@@ -341,6 +371,7 @@ func generateTestClusterStagedUpdateRun() *placementv1beta1.ClusterStagedUpdateR
341371
PlacementName: testCRPName,
342372
ResourceSnapshotIndex: testResourceSnapshotIndex,
343373
StagedUpdateStrategyName: testUpdateStrategyName,
374+
State: placementv1beta1.StateStarted,
344375
},
345376
}
346377
}
@@ -796,23 +827,14 @@ func generateFalseCondition(obj client.Object, condType any) metav1.Condition {
796827
}
797828
}
798829

799-
func generateFalseProgressingCondition(obj client.Object, condType any, succeeded bool) metav1.Condition {
830+
func generateFalseProgressingCondition(obj client.Object, condType any, reason string) metav1.Condition {
831+
falseCond := generateFalseCondition(obj, condType)
832+
falseCond.Reason = reason
833+
return falseCond
834+
}
835+
836+
func generateFalseSucceededCondition(obj client.Object, condType any, reason string) metav1.Condition {
800837
falseCond := generateFalseCondition(obj, condType)
801-
reason := ""
802-
switch condType {
803-
case placementv1beta1.StagedUpdateRunConditionProgressing:
804-
if succeeded {
805-
reason = condition.UpdateRunSucceededReason
806-
} else {
807-
reason = condition.UpdateRunFailedReason
808-
}
809-
case placementv1beta1.StageUpdatingConditionProgressing:
810-
if succeeded {
811-
reason = condition.StageUpdatingSucceededReason
812-
} else {
813-
reason = condition.StageUpdatingFailedReason
814-
}
815-
}
816838
falseCond.Reason = reason
817839
return falseCond
818840
}

0 commit comments

Comments
 (0)