diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java index b59c3e99f0741..f523b4b086f35 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java @@ -130,23 +130,15 @@ private static AssignmentPlan mergePlans( return finalPlanBuilder.build(); } - private static void copyAssignments( - AssignmentPlan source, - AssignmentPlan.Builder dest, - Map originalNodeById - ) { - for (AssignmentPlan.Deployment m : source.deployments()) { - Map nodeAssignments = source.assignments(m).orElse(Map.of()); - for (Map.Entry assignment : nodeAssignments.entrySet()) { - AssignmentPlan.Node originalNode = originalNodeById.get(assignment.getKey().id()); - dest.assignModelToNode(m, originalNode, assignment.getValue()); - if (m.currentAllocationsByNodeId().containsKey(originalNode.id())) { - // TODO (#101612) requiredMemory should be calculated by the AssignmentPlan.Builder - // As the node has all its available memory we need to manually account memory of models with - // current allocations. - long requiredMemory = m.estimateMemoryUsageBytes(m.currentAllocationsByNodeId().get(originalNode.id())); - dest.accountMemory(m, originalNode, requiredMemory); - } + /** + * Transfers assignments from the source AssignmentPlan to the destination AssignmentPlan.Builder. + */ + static void copyAssignments(AssignmentPlan source, AssignmentPlan.Builder dest, Map originalNodeById) { + for (AssignmentPlan.Deployment deployment : source.deployments()) { + Map sourceNodeAssignments = source.assignments(deployment).orElse(Map.of()); + for (Map.Entry sourceAssignment : sourceNodeAssignments.entrySet()) { + AssignmentPlan.Node node = originalNodeById.get(sourceAssignment.getKey().id()); + dest.assignModelToNode(deployment, node, sourceAssignment.getValue()); } } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java index a8eb3bea499a1..a90a8cb9d5262 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java @@ -223,23 +223,43 @@ public int compareTo(AssignmentPlan o) { return Comparator.comparing(AssignmentPlan::computeQuality).compare(this, o); } + /** + * Checks whether all deployments in the current {@link AssignmentPlan} have at least as many + * allocations as currently assigned. + */ public boolean satisfiesCurrentAssignments() { return deployments().stream().allMatch(this::isSatisfyingCurrentAssignmentsForModel); } + /** + * Checks whether the current assignments for a given {@link Deployment} meet its allocation requirements. + * + * It ensures that the total number of allocations assigned to the deployment across all nodes is + * at least equal to the deployment's current assigned allocations. + */ private boolean isSatisfyingCurrentAssignmentsForModel(Deployment m) { if (m.currentAllocationsByNodeId().isEmpty()) { return true; } Map nodeAssignments = assignments.get(m); - int currentAllocations = nodeAssignments.values().stream().mapToInt(Integer::intValue).sum(); - return currentAllocations >= m.getCurrentAssignedAllocations(); + int inPlanAssignedAllocations = nodeAssignments.values().stream().mapToInt(Integer::intValue).sum(); + return inPlanAssignedAllocations >= m.getCurrentAssignedAllocations(); } - public boolean satisfiesAllocations(Deployment m) { - return remainingModelAllocations.getOrDefault(m, 0) == 0; + /** + * Checks if the current assignments satisfy the deployment's allocation requirements. + * @param deployment the deployment to check + * @return true if the current assignments satisfy the deployment's allocation requirements, false otherwise + */ + public boolean satisfiesAllocations(Deployment deployment) { + return remainingModelAllocations.getOrDefault(deployment, 0) == 0; } + /** + * Checks if the current assignments satisfy all deployments' allocation requirements. This means that + * each deployment has no remaining allocations left to assign. + * @return true if the current assignments satisfy the deployments' allocation requirements, false otherwise + */ public boolean satisfiesAllModels() { return deployments().stream().allMatch(this::satisfiesAllocations); } @@ -424,8 +444,7 @@ public Builder assignModelToNode(Deployment deployment, Node node, int allocatio if (allocations <= 0) { return this; } - if (/*isAlreadyAssigned(deployment, node) == false - &&*/ requiredMemory > remainingNodeMemory.get(node)) { + if (requiredMemory > remainingNodeMemory.get(node)) { throw new IllegalArgumentException( "not enough memory on node [" + node.id() @@ -450,7 +469,7 @@ public Builder assignModelToNode(Deployment deployment, Node node, int allocatio ); } - assignments.get(deployment).compute(node, (n, remAllocations) -> remAllocations + allocations); + assignments.get(deployment).compute(node, (n, assignedAllocations) -> assignedAllocations + allocations); accountMemory(deployment, node, requiredMemory); if (deployment.priority == Priority.NORMAL) { @@ -461,23 +480,10 @@ public Builder assignModelToNode(Deployment deployment, Node node, int allocatio } private int getAssignedAllocations(Deployment deployment, Node node) { - int currentAllocations = getCurrentAllocations(deployment, node); - int assignmentAllocations = assignments.get(deployment).get(node); - return currentAllocations + assignmentAllocations; - } - - private static int getCurrentAllocations(Deployment m, Node n) { - return m.currentAllocationsByNodeId.containsKey(n.id()) ? m.currentAllocationsByNodeId.get(n.id()) : 0; - } - - public void accountMemory(Deployment m, Node n) { - // TODO (#101612) remove or refactor unused method - long requiredMemory = getDeploymentMemoryRequirement(m, n, getCurrentAllocations(m, n)); - accountMemory(m, n, requiredMemory); + return assignments.get(deployment).get(node); } public void accountMemory(Deployment m, Node n, long requiredMemory) { - // TODO (#101612) computation of required memory should be done internally remainingNodeMemory.computeIfPresent(n, (k, v) -> v - requiredMemory); if (remainingNodeMemory.containsKey(n) && remainingNodeMemory.get(n) < 0) { throw new IllegalArgumentException("not enough memory on node [" + n.id() + "] to assign model [" + m.deploymentId() + "]"); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanner.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanner.java index e1b6fe0de0b9e..bb7998035ff46 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanner.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanner.java @@ -60,13 +60,26 @@ public AssignmentPlan computePlan() { return computePlan(true); } - public AssignmentPlan computePlan(boolean tryAssigningPreviouslyAssignedModels) { + /** + * Computes an {@link AssignmentPlan} for the given nodes and deployments. + * If {@code tryAssigningAllPreviouslyAllocatedModels} is true, then the plan will + * attempt to assign at least one allocation to previously assigned models. + * Otherwise, it will only ensure that deployments assigned to existing nodes will preserve at least one allocation + * + * @param tryAssigningAllPreviouslyAllocatedModels whether to do the best effort assigning previously assigned models somewhere + * with at least one allocation + * @return the computed assignment plan + */ + public AssignmentPlan computePlan(boolean tryAssigningAllPreviouslyAllocatedModels) { logger.debug(() -> format("Computing plan for nodes = %s; deployments = %s", nodes, deployments)); AssignmentPlan bestPlan; AssignmentPlan planSatisfyingCurrentAssignments = solveSatisfyingCurrentAssignments(); logger.debug(() -> "Plan satisfying current assignments =\n" + planSatisfyingCurrentAssignments.prettyPrint()); - if (planSatisfyingCurrentAssignments.arePreviouslyAssignedModelsAssigned() == false && tryAssigningPreviouslyAssignedModels) { + if (planSatisfyingCurrentAssignments.arePreviouslyAssignedModelsAssigned() || tryAssigningAllPreviouslyAllocatedModels == false) { + bestPlan = planSatisfyingCurrentAssignments; + } else { + // try to reuse any deployment that would otherwise drop to zero allocations AssignmentPlan planAllocatingAtLeastOnceModelsThatWerePreviouslyAllocated = solveAllocatingAtLeastOnceModelsThatWerePreviouslyAllocated(); logger.debug( @@ -82,8 +95,6 @@ public AssignmentPlan computePlan(boolean tryAssigningPreviouslyAssignedModels) ? planSatisfyingCurrentAssignments : planAllocatingAtLeastOnceModelsThatWerePreviouslyAllocated; } - } else { - bestPlan = planSatisfyingCurrentAssignments; } logger.debug(() -> "Best plan =\n" + bestPlan.prettyPrint()); @@ -91,19 +102,30 @@ public AssignmentPlan computePlan(boolean tryAssigningPreviouslyAssignedModels) return bestPlan; } + /** + * Computes the best assignment plan from two strategies: + * 1. Preserving one allocation on current assignments, which is the most flexible + * 2. Preserving all allocations on current assignments, which is more conservative + * @return the best assignment plan + */ private AssignmentPlan solveSatisfyingCurrentAssignments() { AssignmentPlan bestPlan; // First solve preserving one allocation per assignment because that is most flexible AssignmentPlan planKeepingOneAllocationOnCurrentAssignments = solveKeepingOneAllocationOnCurrentAssignments(); - if (planKeepingOneAllocationOnCurrentAssignments.satisfiesCurrentAssignments() == false) { + + if (planKeepingOneAllocationOnCurrentAssignments.satisfiesAllModels()) { + // If the plan satisfies all models, then we can use it as is + bestPlan = planKeepingOneAllocationOnCurrentAssignments; + } else if (planKeepingOneAllocationOnCurrentAssignments.satisfiesCurrentAssignments() == false) { + // If in the new assignment plan, some deployments have fewer allocations than in the current assignments, + // try explicitly preserving all allocations on current assignments. bestPlan = solvePreservingAllAllocationsOnCurrentAssignments(); - } else if (planKeepingOneAllocationOnCurrentAssignments.satisfiesAllModels() == false) { + } else { + // Choose the best strategy according to {@link AssignmentPlan#computeQuality(AssignmentPlan)} AssignmentPlan planKeepingAllAllocationsOnCurrentAssignments = solvePreservingAllAllocationsOnCurrentAssignments(); bestPlan = planKeepingAllAllocationsOnCurrentAssignments.compareTo(planKeepingOneAllocationOnCurrentAssignments) >= 0 ? planKeepingAllAllocationsOnCurrentAssignments : planKeepingOneAllocationOnCurrentAssignments; - } else { - bestPlan = planKeepingOneAllocationOnCurrentAssignments; } return bestPlan; } @@ -120,7 +142,7 @@ private AssignmentPlan solveAllocatingAtLeastOnceModelsThatWerePreviouslyAllocat 1, m.threadsPerAllocation(), // don't rely on the current allocation - new HashMap<>(), + Map.of(), m.maxAssignedAllocations(), m.getAdaptiveAllocationsSettings(), m.perDeploymentMemoryBytes(), diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java index 50684cfc6a416..64cd40fdc537d 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java @@ -182,36 +182,39 @@ private AssignmentPlan computePlanAcrossAllNodes(List plans) { List planDeployments = preserveAllAllocations.modelsPreservingAllocations(); AssignmentPlan plan = new LinearProgrammingPlanSolver(planNodes, planDeployments).solvePlan(false); plan = preserveAllAllocations.mergePreservedAllocations(plan); - return swapOriginalModelsInPlan(plan, allNodes, modelsAccountingPlans); + return swapOriginalDeploymentsInPlan(plan, allNodes, modelsAccountingPlans); } - private AssignmentPlan swapOriginalModelsInPlan( + /** + * The method is responsible for reconstructing an AssignmentPlan + * by replacing the deployments and nodes in the given plan with their original counterparts. + * This ensures that the final plan uses the original objects while preserving the assignments + * and memory accounting from the input plan. + * + * @param plan AssignmentPlan to reconstruct with original models and nodes + * @param allNodes List of all nodes in the system, used to find original nodes + * @param planDeployments List of deployments in the plan, not the original deployments + * @return final plan with original models and nodes swapped in + */ + private AssignmentPlan swapOriginalDeploymentsInPlan( AssignmentPlan plan, List allNodes, List planDeployments ) { - final Map originalModelById = deployments.stream() + final Map originalDeploymentsById = deployments.stream() .collect(Collectors.toMap(AssignmentPlan.Deployment::deploymentId, Function.identity())); final Map originalNodeById = allNodes.stream().collect(Collectors.toMap(Node::id, Function.identity())); - AssignmentPlan.Builder planBuilder = AssignmentPlan.builder(allNodes, deployments); - for (AssignmentPlan.Deployment m : planDeployments) { - AssignmentPlan.Deployment originalDeployment = originalModelById.get(m.deploymentId()); - Map nodeAssignments = plan.assignments(m).orElse(Map.of()); + AssignmentPlan.Builder finalPlanBuilder = AssignmentPlan.builder(allNodes, deployments); + + for (AssignmentPlan.Deployment planDeployment : planDeployments) { + AssignmentPlan.Deployment originalDeployment = originalDeploymentsById.get(planDeployment.deploymentId()); + Map nodeAssignments = plan.assignments(planDeployment).orElse(Map.of()); for (Map.Entry assignment : nodeAssignments.entrySet()) { Node originalNode = originalNodeById.get(assignment.getKey().id()); - planBuilder.assignModelToNode(originalDeployment, originalNode, assignment.getValue()); - if (originalDeployment.currentAllocationsByNodeId().containsKey(originalNode.id())) { - // TODO (#101612) requiredMemory should be calculated by the AssignmentPlan.Builder - // As the node has all its available memory we need to manually account memory of models with - // current allocations. - long requiredMemory = originalDeployment.estimateMemoryUsageBytes( - originalDeployment.currentAllocationsByNodeId().get(originalNode.id()) - ); - planBuilder.accountMemory(m, originalNode, requiredMemory); - } + finalPlanBuilder.assignModelToNode(originalDeployment, originalNode, assignment.getValue()); } } - return planBuilder.build(); + return finalPlanBuilder.build(); } private Map> mergeAllocationsByNodeIdByDeploymentId(List plans) { diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java index a2b321a332ae1..b873493100798 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java @@ -21,6 +21,7 @@ import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignment; import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignmentMetadata; import org.elasticsearch.xpack.ml.MachineLearning; +import org.elasticsearch.xpack.ml.inference.assignment.planning.AssignmentPlan; import org.elasticsearch.xpack.ml.job.NodeLoad; import java.util.ArrayList; @@ -28,6 +29,8 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.function.Function; +import java.util.stream.Collectors; import static org.hamcrest.Matchers.aMapWithSize; import static org.hamcrest.Matchers.anEmptyMap; @@ -1127,6 +1130,74 @@ public void testRebalance_GivenFirstModelToAdd_GivenScalingProcessorSetting() { assertThat(assignment.getReason().isPresent(), is(false)); } + public void testCopyAssignments() { + // Create test nodes + AssignmentPlan.Node node1 = new AssignmentPlan.Node("node-1", ByteSizeValue.ofGb(1).getBytes(), 4); + AssignmentPlan.Node node2 = new AssignmentPlan.Node("node-2", ByteSizeValue.ofGb(1).getBytes(), 8); + List nodes = List.of(node1, node2); + + // Create test deployments + AssignmentPlan.Deployment deployment1 = new AssignmentPlan.Deployment( + "deployment-1", + "model-1", + ByteSizeValue.ofMb(100).getBytes(), + 2, + 1, + Map.of(), + 0, + null, + Priority.NORMAL, + 0, + 0 + ); + AssignmentPlan.Deployment deployment2 = new AssignmentPlan.Deployment( + "deployment-2", + "model-2", + ByteSizeValue.ofMb(100).getBytes(), + 1, + 2, + Map.of(), + 0, + null, + Priority.LOW, + 0, + 0 + ); + List deployments = List.of(deployment1, deployment2); + + // Create source plan and assign models to nodes + AssignmentPlan.Builder sourceBuilder = AssignmentPlan.builder(nodes, deployments); + sourceBuilder.assignModelToNode(deployment1, node1, 1); + sourceBuilder.assignModelToNode(deployment1, node2, 1); + sourceBuilder.assignModelToNode(deployment2, node2, 1); + AssignmentPlan source = sourceBuilder.build(); + + // Create destination plan + AssignmentPlan.Builder dest = AssignmentPlan.builder(nodes, deployments); + + // Create map of node IDs to original nodes + Map originalNodeById = nodes.stream() + .collect(Collectors.toMap(AssignmentPlan.Node::id, Function.identity())); + + // Call copyAssignments + TrainedModelAssignmentRebalancer.copyAssignments(source, dest, originalNodeById); + + // Build the destination plan + AssignmentPlan result = dest.build(); + + // Verify assignments + Optional> deployment1Assignments = result.assignments(deployment1); + assertThat(deployment1Assignments.isPresent(), is(true)); + assertThat(deployment1Assignments.get().size(), equalTo(2)); + assertThat(deployment1Assignments.get().get(node1), equalTo(1)); + assertThat(deployment1Assignments.get().get(node2), equalTo(1)); + + Optional> deployment2Assignments = result.assignments(deployment2); + assertThat(deployment2Assignments.isPresent(), is(true)); + assertThat(deployment2Assignments.get().size(), equalTo(1)); + assertThat(deployment2Assignments.get().get(node2), equalTo(1)); + } + private static StartTrainedModelDeploymentAction.TaskParams lowPriorityParams(String deploymentId, long modelSize) { return lowPriorityParams(deploymentId, deploymentId, modelSize); } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanTests.java index 93e247dc898c4..c7f166a19bb69 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanTests.java @@ -16,6 +16,7 @@ import java.util.Map; import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.is; @@ -131,7 +132,7 @@ public void testAssignModelToNode_GivenNewPlanSatisfiesCurrentAssignment() { builder.assignModelToNode(m, n, 1); assertThat(builder.getRemainingCores(n), equalTo(2)); - assertThat(builder.getRemainingMemory(n), equalTo(ByteSizeValue.ofMb(350).getBytes())); + assertThat(builder.getRemainingMemory(n), equalTo(ByteSizeValue.ofMb(50).getBytes())); assertThat(builder.getRemainingAllocations(m), equalTo(1)); assertThat(builder.getRemainingThreads(m), equalTo(2)); @@ -160,7 +161,7 @@ public void testAssignModelToNode_GivenNewPlanSatisfiesCurrentAssignment() { builder.assignModelToNode(m, n, 1); assertThat(builder.getRemainingCores(n), equalTo(2)); - assertThat(builder.getRemainingMemory(n), equalTo(ByteSizeValue.ofMb(325).getBytes())); + assertThat(builder.getRemainingMemory(n), equalTo(ByteSizeValue.ofMb(0).getBytes())); assertThat(builder.getRemainingAllocations(m), equalTo(1)); assertThat(builder.getRemainingThreads(m), equalTo(2)); @@ -184,7 +185,9 @@ public void testAssignModelToNode_GivenNewPlanDoesNotSatisfyCurrentAssignment() builder.assignModelToNode(m, n, 1); assertThat(builder.getRemainingCores(n), equalTo(2)); - assertThat(builder.getRemainingMemory(n), equalTo(ByteSizeValue.ofMb(300).getBytes())); + // Since perDeployment memory is not specified, we compute the base memory usage. + // The remaining memory is 300MB - (240 MB + 2*30 MB) = 0MB + assertThat(builder.getRemainingMemory(n), equalTo(ByteSizeValue.ofMb(0).getBytes())); assertThat(builder.getRemainingAllocations(m), equalTo(1)); assertThat(builder.getRemainingThreads(m), equalTo(2)); @@ -214,7 +217,11 @@ public void testAssignModelToNode_GivenNewPlanDoesNotSatisfyCurrentAssignment() builder.assignModelToNode(m, n, 1); assertThat(builder.getRemainingCores(n), equalTo(2)); - assertThat(builder.getRemainingMemory(n), equalTo(ByteSizeValue.ofMb(275).getBytes())); + // base memory: 240+2*25 = 290MB + // since perDeployment memory is specified, we compute the new memory format usage: + // 250 (perDeployment) + 1*25 (perAllocation) + 25 (modelDefinition) = 300MB + // Then we take the maximum of 290 and 300, which is 300MB + assertThat(builder.getRemainingMemory(n), equalTo(ByteSizeValue.ofMb(0).getBytes())); assertThat(builder.getRemainingAllocations(m), equalTo(1)); assertThat(builder.getRemainingThreads(m), equalTo(2)); @@ -254,12 +261,8 @@ public void testAssignModelToNode_GivenPreviouslyAssignedModelDoesNotFit() { AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m)); - builder.assignModelToNode(m, n, 2); - AssignmentPlan plan = builder.build(); - - assertThat(plan.deployments(), contains(m)); - assertThat(plan.satisfiesCurrentAssignments(), is(true)); - assertThat(plan.assignments(m).get(), equalTo(Map.of(n, 2))); + Exception e = expectThrows(IllegalArgumentException.class, () -> builder.assignModelToNode(m, n, 2)); + assertThat(e.getMessage(), containsString("not enough memory on node")); } { // new memory format Node n = new Node("n_1", ByteSizeValue.ofMb(340 - 1).getBytes(), 4); @@ -278,12 +281,8 @@ public void testAssignModelToNode_GivenPreviouslyAssignedModelDoesNotFit() { AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m)); - builder.assignModelToNode(m, n, 2); - AssignmentPlan plan = builder.build(); - - assertThat(plan.deployments(), contains(m)); - assertThat(plan.satisfiesCurrentAssignments(), is(true)); - assertThat(plan.assignments(m).get(), equalTo(Map.of(n, 2))); + Exception e = expectThrows(IllegalArgumentException.class, () -> builder.assignModelToNode(m, n, 2)); + assertThat(e.getMessage(), containsString("not enough memory on node")); } } @@ -384,7 +383,9 @@ public void testCanAssign_GivenPreviouslyAssignedModelDoesNotFit() { // old memory format Deployment m = new Deployment("m_1", "m_1", ByteSizeValue.ofMb(31).getBytes(), 1, 1, Map.of("n_1", 1), 0, null, 0, 0); AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m)); - assertThat(builder.canAssign(m, n, 1), is(true)); + // 240 + 2*31 = 302MB, this doesn't fit in 300MB. We don't care that the deployment is currently allocated since + // only previous assignments should be considered + assertThat(builder.canAssign(m, n, 1), is(false)); } { // new memory format @@ -397,11 +398,15 @@ public void testCanAssign_GivenPreviouslyAssignedModelDoesNotFit() { Map.of("n_1", 1), 0, null, - ByteSizeValue.ofMb(300).getBytes(), + ByteSizeValue.ofMb(265).getBytes(), ByteSizeValue.ofMb(10).getBytes() ); AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m)); + // 265 + 1*10 + 25 = 300MB, this doesn't fit in 300MB. We don't care that the deployment is currently allocated since assertThat(builder.canAssign(m, n, 1), is(true)); + builder.assignModelToNode(m, n, 1); + // After assignment, no more memory is available + assertThat(builder.canAssign(m, n, 1), is(false)); } }