-
Notifications
You must be signed in to change notification settings - Fork 25.4k
[ML] Make AssignmentPlan to consider only assigned allocations #132170
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6a9779e
a8cc385
d537939
914381c
0221a2b
d2aa214
9445c01
cd87fc9
b46e15e
a48862e
4626d87
a2247bb
9521646
007afaa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -182,36 +182,39 @@ private AssignmentPlan computePlanAcrossAllNodes(List<AssignmentPlan> plans) { | |
List<AssignmentPlan.Deployment> planDeployments = preserveAllAllocations.modelsPreservingAllocations(); | ||
AssignmentPlan plan = new LinearProgrammingPlanSolver(planNodes, planDeployments).solvePlan(false); | ||
plan = preserveAllAllocations.mergePreservedAllocations(plan); | ||
return swapOriginalModelsInPlan(plan, allNodes, modelsAccountingPlans); | ||
return swapOriginalDeploymentsInPlan(plan, allNodes, modelsAccountingPlans); | ||
} | ||
|
||
private AssignmentPlan swapOriginalModelsInPlan( | ||
/** | ||
* The method is responsible for reconstructing an AssignmentPlan | ||
* by replacing the deployments and nodes in the given plan with their original counterparts. | ||
* This ensures that the final plan uses the original objects while preserving the assignments | ||
* and memory accounting from the input plan. | ||
* | ||
* @param plan AssignmentPlan to reconstruct with original models and nodes | ||
* @param allNodes List of all nodes in the system, used to find original nodes | ||
* @param planDeployments List of deployments in the plan, not the original deployments | ||
* @return final plan with original models and nodes swapped in | ||
*/ | ||
private AssignmentPlan swapOriginalDeploymentsInPlan( | ||
AssignmentPlan plan, | ||
List<Node> allNodes, | ||
List<AssignmentPlan.Deployment> planDeployments | ||
) { | ||
final Map<String, AssignmentPlan.Deployment> originalModelById = deployments.stream() | ||
final Map<String, AssignmentPlan.Deployment> originalDeploymentsById = deployments.stream() | ||
.collect(Collectors.toMap(AssignmentPlan.Deployment::deploymentId, Function.identity())); | ||
final Map<String, Node> originalNodeById = allNodes.stream().collect(Collectors.toMap(Node::id, Function.identity())); | ||
AssignmentPlan.Builder planBuilder = AssignmentPlan.builder(allNodes, deployments); | ||
for (AssignmentPlan.Deployment m : planDeployments) { | ||
AssignmentPlan.Deployment originalDeployment = originalModelById.get(m.deploymentId()); | ||
Map<Node, Integer> nodeAssignments = plan.assignments(m).orElse(Map.of()); | ||
AssignmentPlan.Builder finalPlanBuilder = AssignmentPlan.builder(allNodes, deployments); | ||
|
||
for (AssignmentPlan.Deployment planDeployment : planDeployments) { | ||
AssignmentPlan.Deployment originalDeployment = originalDeploymentsById.get(planDeployment.deploymentId()); | ||
Map<Node, Integer> nodeAssignments = plan.assignments(planDeployment).orElse(Map.of()); | ||
for (Map.Entry<Node, Integer> assignment : nodeAssignments.entrySet()) { | ||
Node originalNode = originalNodeById.get(assignment.getKey().id()); | ||
planBuilder.assignModelToNode(originalDeployment, originalNode, assignment.getValue()); | ||
if (originalDeployment.currentAllocationsByNodeId().containsKey(originalNode.id())) { | ||
// TODO (#101612) requiredMemory should be calculated by the AssignmentPlan.Builder | ||
// As the node has all its available memory we need to manually account memory of models with | ||
// current allocations. | ||
long requiredMemory = originalDeployment.estimateMemoryUsageBytes( | ||
originalDeployment.currentAllocationsByNodeId().get(originalNode.id()) | ||
); | ||
planBuilder.accountMemory(m, originalNode, requiredMemory); | ||
} | ||
finalPlanBuilder.assignModelToNode(originalDeployment, originalNode, assignment.getValue()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now, calling There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please make |
||
} | ||
} | ||
return planBuilder.build(); | ||
return finalPlanBuilder.build(); | ||
} | ||
|
||
private Map<String, Map<String, Integer>> mergeAllocationsByNodeIdByDeploymentId(List<AssignmentPlan> plans) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now, calling
assignedModelToNode
is enough to correctly account for allocated memory.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add a unit test for the
copyAssignments
method in TrainedModelAssignmentRebalancerTests