@@ -33,6 +33,7 @@ import (
33
33
"golang.org/x/crypto/ssh"
34
34
batchv1 "k8s.io/api/batch/v1"
35
35
corev1 "k8s.io/api/core/v1"
36
+ v1 "k8s.io/api/core/v1"
36
37
"k8s.io/apimachinery/pkg/api/equality"
37
38
"k8s.io/apimachinery/pkg/api/errors"
38
39
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -50,6 +51,7 @@ import (
50
51
"k8s.io/client-go/tools/record"
51
52
"k8s.io/client-go/util/workqueue"
52
53
"k8s.io/klog"
54
+ "k8s.io/utils/pointer"
53
55
podgroupv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1"
54
56
volcanoclient "volcano.sh/apis/pkg/client/clientset/versioned"
55
57
podgroupsinformer "volcano.sh/apis/pkg/client/informers/externalversions/scheduling/v1beta1"
@@ -493,7 +495,7 @@ func (c *MPIJobController) syncHandler(key string) error {
493
495
494
496
if len (mpiJob .Status .Conditions ) == 0 {
495
497
msg := fmt .Sprintf ("MPIJob %s/%s is created." , mpiJob .Namespace , mpiJob .Name )
496
- updateMPIJobConditions (mpiJob , common .JobCreated , mpiJobCreatedReason , msg )
498
+ updateMPIJobConditions (mpiJob , common .JobCreated , v1 . ConditionTrue , mpiJobCreatedReason , msg )
497
499
c .recorder .Event (mpiJob , corev1 .EventTypeNormal , "MPIJobCreated" , msg )
498
500
mpiJobsCreatedCount .Inc ()
499
501
}
@@ -503,24 +505,13 @@ func (c *MPIJobController) syncHandler(key string) error {
503
505
// cleanup and stop retrying the MPIJob.
504
506
if isFinished (mpiJob .Status ) && mpiJob .Status .CompletionTime != nil {
505
507
if isCleanUpPods (mpiJob .Spec .RunPolicy .CleanPodPolicy ) {
506
- // set worker StatefulSet Replicas to 0.
507
- if err := c .deleteWorkerPods (mpiJob ); err != nil {
508
- return err
509
- }
510
- initializeMPIJobStatuses (mpiJob , kubeflow .MPIReplicaTypeWorker )
511
- if c .gangSchedulerName != "" {
512
- if err := c .deletePodGroups (mpiJob ); err != nil {
513
- return err
514
- }
515
- }
516
- mpiJob .Status .ReplicaStatuses [common .ReplicaType (kubeflow .MPIReplicaTypeWorker )].Active = 0
517
- return c .updateStatusHandler (mpiJob )
508
+ return cleanUpWorkerPods (mpiJob , c )
518
509
}
519
510
return nil
520
511
}
521
512
522
513
// first set StartTime.
523
- if mpiJob .Status .StartTime == nil {
514
+ if mpiJob .Status .StartTime == nil && ! isMPIJobSuspended ( mpiJob ) {
524
515
now := metav1 .Now ()
525
516
mpiJob .Status .StartTime = & now
526
517
}
@@ -532,9 +523,10 @@ func (c *MPIJobController) syncHandler(key string) error {
532
523
}
533
524
534
525
var worker []* corev1.Pod
535
- // We're done if the launcher either succeeded or failed.
536
- done := launcher != nil && isJobFinished (launcher )
537
- if ! done {
526
+ // We're done if the launcher either succeeded or failed. We also skip
527
+ // creation of the auxiliary objects if the MPIJob is suspended.
528
+ running := ! (launcher != nil && isJobFinished (launcher )) && ! isMPIJobSuspended (mpiJob )
529
+ if running {
538
530
_ , err := c .getOrCreateService (mpiJob , newWorkersService (mpiJob ))
539
531
if err != nil {
540
532
return fmt .Errorf ("getting or creating Service to front workers: %w" , err )
@@ -585,9 +577,40 @@ func (c *MPIJobController) syncHandler(key string) error {
585
577
return err
586
578
}
587
579
580
+ if launcher != nil {
581
+ if isMPIJobSuspended (mpiJob ) != isJobSuspended (launcher ) {
582
+ // align the suspension state of launcher with the MPIJob
583
+ launcher .Spec .Suspend = pointer .Bool (isMPIJobSuspended (mpiJob ))
584
+ if _ , err := c .kubeClient .BatchV1 ().Jobs (namespace ).Update (context .TODO (), launcher , metav1.UpdateOptions {}); err != nil {
585
+ return err
586
+ }
587
+ }
588
+ }
589
+
590
+ // cleanup the running worker pods if the MPI job is suspended
591
+ if isMPIJobSuspended (mpiJob ) {
592
+ if err := cleanUpWorkerPods (mpiJob , c ); err != nil {
593
+ return err
594
+ }
595
+ }
588
596
return nil
589
597
}
590
598
599
+ func cleanUpWorkerPods (mpiJob * kubeflow.MPIJob , c * MPIJobController ) error {
600
+ // set worker StatefulSet Replicas to 0.
601
+ if err := c .deleteWorkerPods (mpiJob ); err != nil {
602
+ return err
603
+ }
604
+ initializeMPIJobStatuses (mpiJob , kubeflow .MPIReplicaTypeWorker )
605
+ if c .gangSchedulerName != "" {
606
+ if err := c .deletePodGroups (mpiJob ); err != nil {
607
+ return err
608
+ }
609
+ }
610
+ mpiJob .Status .ReplicaStatuses [common .ReplicaType (kubeflow .MPIReplicaTypeWorker )].Active = 0
611
+ return c .updateStatusHandler (mpiJob )
612
+ }
613
+
591
614
// getLauncherJob gets the launcher Job controlled by this MPIJob.
592
615
func (c * MPIJobController ) getLauncherJob (mpiJob * kubeflow.MPIJob ) (* batchv1.Job , error ) {
593
616
launcher , err := c .jobLister .Jobs (mpiJob .Namespace ).Get (mpiJob .Name + launcherSuffix )
@@ -857,6 +880,14 @@ func (c *MPIJobController) getOrCreateWorker(mpiJob *kubeflow.MPIJob) ([]*corev1
857
880
return workerPods , nil
858
881
}
859
882
883
+ func isMPIJobSuspended (mpiJob * kubeflow.MPIJob ) bool {
884
+ return pointer .BoolDeref (mpiJob .Spec .RunPolicy .Suspend , false )
885
+ }
886
+
887
+ func isJobSuspended (job * batchv1.Job ) bool {
888
+ return pointer .BoolDeref (job .Spec .Suspend , false )
889
+ }
890
+
860
891
func (c * MPIJobController ) deleteWorkerPods (mpiJob * kubeflow.MPIJob ) error {
861
892
var (
862
893
workerPrefix = mpiJob .Name + workerSuffix
@@ -905,6 +936,19 @@ func (c *MPIJobController) updateMPIJobStatus(mpiJob *kubeflow.MPIJob, launcher
905
936
if err != nil {
906
937
return fmt .Errorf ("checking launcher pods running: %w" , err )
907
938
}
939
+ if isMPIJobSuspended (mpiJob ) {
940
+ // it is suspended now
941
+ if updateMPIJobConditions (mpiJob , kubeflow .JobSuspended , v1 .ConditionTrue , "MPIJobSuspended" , "MPIJob suspended" ) {
942
+ c .recorder .Event (mpiJob , corev1 .EventTypeNormal , "Suspended" , "MPIJob suspended" )
943
+ }
944
+ } else if getCondition (mpiJob .Status , kubeflow .JobSuspended ) != nil {
945
+ // it is not suspended now, consider resumed if the condition was set before
946
+ if updateMPIJobConditions (mpiJob , kubeflow .JobSuspended , v1 .ConditionTrue , "MPIJobResumed" , "MPIJob resumed" ) {
947
+ c .recorder .Event (mpiJob , corev1 .EventTypeNormal , "Resumed" , "MPIJob resumed" )
948
+ now := metav1 .NewTime (time .Now ())
949
+ mpiJob .Status .StartTime = & now
950
+ }
951
+ }
908
952
// Job.status.Active accounts for Pending and Running pods. Count running pods
909
953
// from the lister instead.
910
954
launcherPodsCnt := countRunningPods (launcherPods )
@@ -919,7 +963,7 @@ func (c *MPIJobController) updateMPIJobStatus(mpiJob *kubeflow.MPIJob, launcher
919
963
if mpiJob .Status .CompletionTime == nil {
920
964
mpiJob .Status .CompletionTime = launcher .Status .CompletionTime
921
965
}
922
- updateMPIJobConditions (mpiJob , common .JobSucceeded , mpiJobSucceededReason , msg )
966
+ updateMPIJobConditions (mpiJob , common .JobSucceeded , v1 . ConditionTrue , mpiJobSucceededReason , msg )
923
967
mpiJobsSuccessCount .Inc ()
924
968
} else if isJobFailed (launcher ) {
925
969
c .updateMPIJobFailedStatus (mpiJob , launcher , launcherPods )
@@ -953,13 +997,13 @@ func (c *MPIJobController) updateMPIJobStatus(mpiJob *kubeflow.MPIJob, launcher
953
997
if evict > 0 {
954
998
msg := fmt .Sprintf ("%d/%d workers are evicted" , evict , len (worker ))
955
999
klog .Infof ("MPIJob <%s/%s>: %v" , mpiJob .Namespace , mpiJob .Name , msg )
956
- updateMPIJobConditions (mpiJob , common .JobFailed , mpiJobEvict , msg )
1000
+ updateMPIJobConditions (mpiJob , common .JobFailed , v1 . ConditionTrue , mpiJobEvict , msg )
957
1001
c .recorder .Event (mpiJob , corev1 .EventTypeWarning , mpiJobEvict , msg )
958
1002
}
959
1003
960
1004
if launcher != nil && launcherPodsCnt >= 1 && running == len (worker ) {
961
1005
msg := fmt .Sprintf ("MPIJob %s/%s is running." , mpiJob .Namespace , mpiJob .Name )
962
- updateMPIJobConditions (mpiJob , common .JobRunning , mpiJobRunningReason , msg )
1006
+ updateMPIJobConditions (mpiJob , common .JobRunning , v1 . ConditionTrue , mpiJobRunningReason , msg )
963
1007
c .recorder .Eventf (mpiJob , corev1 .EventTypeNormal , "MPIJobRunning" , "MPIJob %s/%s is running" , mpiJob .Namespace , mpiJob .Name )
964
1008
}
965
1009
@@ -999,7 +1043,7 @@ func (c *MPIJobController) updateMPIJobFailedStatus(mpiJob *kubeflow.MPIJob, lau
999
1043
now := metav1 .Now ()
1000
1044
mpiJob .Status .CompletionTime = & now
1001
1045
}
1002
- updateMPIJobConditions (mpiJob , common .JobFailed , reason , msg )
1046
+ updateMPIJobConditions (mpiJob , common .JobFailed , v1 . ConditionTrue , reason , msg )
1003
1047
mpiJobsFailureCount .Inc ()
1004
1048
}
1005
1049
@@ -1304,7 +1348,7 @@ func (c *MPIJobController) newWorker(mpiJob *kubeflow.MPIJob, index int) *corev1
1304
1348
}
1305
1349
1306
1350
func (c * MPIJobController ) newLauncherJob (mpiJob * kubeflow.MPIJob ) * batchv1.Job {
1307
- return & batchv1.Job {
1351
+ job := & batchv1.Job {
1308
1352
ObjectMeta : metav1.ObjectMeta {
1309
1353
Name : mpiJob .Name + launcherSuffix ,
1310
1354
Namespace : mpiJob .Namespace ,
@@ -1322,6 +1366,10 @@ func (c *MPIJobController) newLauncherJob(mpiJob *kubeflow.MPIJob) *batchv1.Job
1322
1366
Template : c .newLauncherPodTemplate (mpiJob ),
1323
1367
},
1324
1368
}
1369
+ if isMPIJobSuspended (mpiJob ) {
1370
+ job .Spec .Suspend = pointer .Bool (true )
1371
+ }
1372
+ return job
1325
1373
}
1326
1374
1327
1375
// newLauncherPodTemplate creates a new launcher Job for an MPIJob resource. It also sets
0 commit comments