-
Notifications
You must be signed in to change notification settings - Fork 144
use the priority of kube-batch #209
base: master
Are you sure you want to change the base?
Changes from 2 commits
9b5cdab
e3b3ea0
aeb093e
09d6c1b
ba553a4
a50a725
6e17f2c
3683cfa
47fed48
a49a402
aad3b4c
2442fdc
6c24d6e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -69,6 +69,10 @@ type PyTorchJobSpec struct { | |||||
// "Worker": PyTorchReplicaSpec, | ||||||
// } | ||||||
PyTorchReplicaSpecs map[PyTorchReplicaType]*common.ReplicaSpec `json:"pytorchReplicaSpecs"` | ||||||
|
||||||
//添加判断优先级的属性 | ||||||
//add PriorityClassName | ||||||
PriorityClassName string `json:"priorityClassName,omitempty"` | ||||||
|
PriorityClassName string `json:"priorityClassName,omitempty"` | |
PriorityClassName *string `json:"priorityClassName,omitempty"` |
Since it is optional, we can define it as a pointer.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -437,7 +437,9 @@ func (pc *PyTorchController) reconcilePyTorchJobs(job *pyv1.PyTorchJob) error { | |
|
||
if pc.Config.EnableGangScheduling { | ||
minAvailableReplicas := getTotalReplicas(job) | ||
_, err := pc.SyncPodGroup(job, minAvailableReplicas) | ||
priorityClassName:=getPriorityClassName(job) | ||
//_, err := pc.SyncPodGroup(job, minAvailableReplicas) | ||
_, err := pc.SyncPodGroupTest(job, minAvailableReplicas,priorityClassName) | ||
|
||
if err != nil { | ||
logger.Warnf("Sync PodGroup %v: %v", job.Name, err) | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -69,6 +69,7 @@ func (pc *PyTorchController) updateStatusSingle(job *pyv1.PyTorchJob, rtype pyv1 | |
|
||
// Expect to have `replicas - succeeded` pods alive. | ||
commonType := common.ReplicaType(rtype) | ||
//expected是成功的判断标志,等于0时,成功的数量等于副本数,认为成功 | ||
|
||
expected := replicas - int(job.Status.ReplicaStatuses[commonType].Succeeded) | ||
running := int(job.Status.ReplicaStatuses[commonType].Active) | ||
failed := int(job.Status.ReplicaStatuses[commonType].Failed) | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please use English here.