elastic
diff --git a/‎docs/changelog/109667.yaml
Lines changed: 5 additions & 0 deletions b/‎docs/changelog/109667.yaml
Lines changed: 5 additions & 0 deletions
diff --git a/‎server/src/main/java/org/elasticsearch/TransportVersions.java
Lines changed: 1 addition & 0 deletions b/‎server/src/main/java/org/elasticsearch/TransportVersions.java
Lines changed: 1 addition & 0 deletions
diff --git a/‎x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/CreateTrainedModelAssignmentAction.java
Lines changed: 20 additions & 3 deletions b/‎x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/CreateTrainedModelAssignmentAction.java
Lines changed: 20 additions & 3 deletions
diff --git a/‎x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/StartTrainedModelDeploymentAction.java
Lines changed: 77 additions & 13 deletions b/‎x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/StartTrainedModelDeploymentAction.java
Lines changed: 77 additions & 13 deletions
@@ -0,0 +1,5 @@
+pr: 109667
+summary: Inference autoscaling
+area: Machine Learning
+type: feature
+issues: []
@@ -210,6 +210,7 @@ static TransportVersion def(int id) {
     public static final TransportVersion VERSIONED_MASTER_NODE_REQUESTS = def(8_701_00_0);
     public static final TransportVersion ML_INFERENCE_AMAZON_BEDROCK_ADDED = def(8_702_00_0);
     public static final TransportVersion ML_INFERENCE_DONT_DELETE_WHEN_SEMANTIC_TEXT_EXISTS = def(8_703_00_0);
+    public static final TransportVersion INFERENCE_ADAPTIVE_ALLOCATIONS = def(8_704_00_0);
 
     /*
      * STOP! READ THIS FIRST! No, really,
 
@@ -7,6 +7,7 @@
 
 package org.elasticsearch.xpack.core.ml.action;
 
+import org.elasticsearch.TransportVersions;
 import org.elasticsearch.action.ActionRequestValidationException;
 import org.elasticsearch.action.ActionResponse;
 import org.elasticsearch.action.ActionType;
@@ -18,6 +19,7 @@
 import org.elasticsearch.xcontent.ToXContentObject;
 import org.elasticsearch.xcontent.XContentBuilder;
 import org.elasticsearch.xcontent.XContentParser;
+import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings;
 import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignment;
 import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
 
@@ -34,15 +36,22 @@ private CreateTrainedModelAssignmentAction() {
 
     public static class Request extends MasterNodeRequest<Request> {
         private final StartTrainedModelDeploymentAction.TaskParams taskParams;
+        private final AdaptiveAllocationsSettings adaptiveAllocationsSettings;
 
-        public Request(StartTrainedModelDeploymentAction.TaskParams taskParams) {
+        public Request(StartTrainedModelDeploymentAction.TaskParams taskParams, AdaptiveAllocationsSettings adaptiveAllocationsSettings) {
             super(TRAPPY_IMPLICIT_DEFAULT_MASTER_NODE_TIMEOUT);
             this.taskParams = ExceptionsHelper.requireNonNull(taskParams, "taskParams");
+            this.adaptiveAllocationsSettings = adaptiveAllocationsSettings;
         }
 
         public Request(StreamInput in) throws IOException {
             super(in);
             this.taskParams = new StartTrainedModelDeploymentAction.TaskParams(in);
+            if (in.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS)) {
+                this.adaptiveAllocationsSettings = in.readOptionalWriteable(AdaptiveAllocationsSettings::new);
+            } else {
+                this.adaptiveAllocationsSettings = null;
+            }
         }
 
         @Override
@@ -54,24 +63,32 @@ public ActionRequestValidationException validate() {
         public void writeTo(StreamOutput out) throws IOException {
             super.writeTo(out);
             taskParams.writeTo(out);
+            if (out.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS)) {
+                out.writeOptionalWriteable(adaptiveAllocationsSettings);
+            }
         }
 
         @Override
         public boolean equals(Object o) {
             if (this == o) return true;
             if (o == null || getClass() != o.getClass()) return false;
             Request request = (Request) o;
-            return Objects.equals(taskParams, request.taskParams);
+            return Objects.equals(taskParams, request.taskParams)
+                && Objects.equals(adaptiveAllocationsSettings, request.adaptiveAllocationsSettings);
         }
 
         @Override
         public int hashCode() {
-            return Objects.hash(taskParams);
+            return Objects.hash(taskParams, adaptiveAllocationsSettings);
         }
 
         public StartTrainedModelDeploymentAction.TaskParams getTaskParams() {
             return taskParams;
         }
+
+        public AdaptiveAllocationsSettings getAdaptiveAllocationsSettings() {
+            return adaptiveAllocationsSettings;
+        }
     }
 
     public static class Response extends ActionResponse implements ToXContentObject {
 
@@ -29,8 +29,10 @@
 import org.elasticsearch.xcontent.XContentParser;
 import org.elasticsearch.xpack.core.ml.MlConfigVersion;
 import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig;
+import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings;
 import org.elasticsearch.xpack.core.ml.inference.assignment.AllocationStatus;
 import org.elasticsearch.xpack.core.ml.inference.assignment.Priority;
+import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignment;
 import org.elasticsearch.xpack.core.ml.job.messages.Messages;
 import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
 import org.elasticsearch.xpack.core.ml.utils.MlTaskParams;
@@ -40,7 +42,6 @@
 import java.util.Optional;
 import java.util.concurrent.TimeUnit;
 
-import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;
 import static org.elasticsearch.xpack.core.ml.MlTasks.trainedModelAssignmentTaskDescription;
 
 public class StartTrainedModelDeploymentAction extends ActionType<CreateTrainedModelAssignmentAction.Response> {
@@ -99,6 +100,7 @@ public static class Request extends MasterNodeRequest<Request> implements ToXCon
         public static final ParseField QUEUE_CAPACITY = TaskParams.QUEUE_CAPACITY;
         public static final ParseField CACHE_SIZE = TaskParams.CACHE_SIZE;
         public static final ParseField PRIORITY = TaskParams.PRIORITY;
+        public static final ParseField ADAPTIVE_ALLOCATIONS = TrainedModelAssignment.ADAPTIVE_ALLOCATIONS;
 
         public static final ObjectParser<Request, Void> PARSER = new ObjectParser<>(NAME, Request::new);
 
@@ -117,6 +119,12 @@ public static class Request extends MasterNodeRequest<Request> implements ToXCon
                 ObjectParser.ValueType.VALUE
             );
             PARSER.declareString(Request::setPriority, PRIORITY);
+            PARSER.declareObjectOrNull(
+                Request::setAdaptiveAllocationsSettings,
+                (p, c) -> AdaptiveAllocationsSettings.PARSER.parse(p, c).build(),
+                null,
+                ADAPTIVE_ALLOCATIONS
+            );
         }
 
         public static Request parseRequest(String modelId, String deploymentId, XContentParser parser) {
@@ -140,7 +148,8 @@ public static Request parseRequest(String modelId, String deploymentId, XContent
         private TimeValue timeout = DEFAULT_TIMEOUT;
         private AllocationStatus.State waitForState = DEFAULT_WAITFOR_STATE;
         private ByteSizeValue cacheSize;
-        private int numberOfAllocations = DEFAULT_NUM_ALLOCATIONS;
+        private Integer numberOfAllocations;
+        private AdaptiveAllocationsSettings adaptiveAllocationsSettings = null;
         private int threadsPerAllocation = DEFAULT_NUM_THREADS;
         private int queueCapacity = DEFAULT_QUEUE_CAPACITY;
         private Priority priority = DEFAULT_PRIORITY;
@@ -160,7 +169,11 @@ public Request(StreamInput in) throws IOException {
             modelId = in.readString();
             timeout = in.readTimeValue();
             waitForState = in.readEnum(AllocationStatus.State.class);
-            numberOfAllocations = in.readVInt();
+            if (in.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS)) {
+                numberOfAllocations = in.readOptionalVInt();
+            } else {
+                numberOfAllocations = in.readVInt();
+            }
             threadsPerAllocation = in.readVInt();
             queueCapacity = in.readVInt();
             if (in.getTransportVersion().onOrAfter(TransportVersions.V_8_4_0)) {
@@ -171,12 +184,16 @@ public Request(StreamInput in) throws IOException {
             } else {
                 this.priority = Priority.NORMAL;
             }
-
             if (in.getTransportVersion().onOrAfter(TransportVersions.V_8_8_0)) {
                 this.deploymentId = in.readString();
             } else {
                 this.deploymentId = modelId;
             }
+            if (in.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS)) {
+                this.adaptiveAllocationsSettings = in.readOptionalWriteable(AdaptiveAllocationsSettings::new);
+            } else {
+                this.adaptiveAllocationsSettings = null;
+            }
         }
 
         public final void setModelId(String modelId) {
@@ -212,14 +229,34 @@ public Request setWaitForState(AllocationStatus.State waitForState) {
             return this;
         }
 
-        public int getNumberOfAllocations() {
+        public Integer getNumberOfAllocations() {
             return numberOfAllocations;
         }
 
-        public void setNumberOfAllocations(int numberOfAllocations) {
+        public int computeNumberOfAllocations() {
+            if (numberOfAllocations != null) {
+                return numberOfAllocations;
+            } else {
+                if (adaptiveAllocationsSettings == null || adaptiveAllocationsSettings.getMinNumberOfAllocations() == null) {
+                    return DEFAULT_NUM_ALLOCATIONS;
+                } else {
+                    return adaptiveAllocationsSettings.getMinNumberOfAllocations();
+                }
+            }
+        }
+
+        public void setNumberOfAllocations(Integer numberOfAllocations) {
             this.numberOfAllocations = numberOfAllocations;
         }
 
+        public AdaptiveAllocationsSettings getAdaptiveAllocationsSettings() {
+            return adaptiveAllocationsSettings;
+        }
+
+        public void setAdaptiveAllocationsSettings(AdaptiveAllocationsSettings adaptiveAllocationsSettings) {
+            this.adaptiveAllocationsSettings = adaptiveAllocationsSettings;
+        }
+
         public int getThreadsPerAllocation() {
             return threadsPerAllocation;
         }
@@ -258,7 +295,11 @@ public void writeTo(StreamOutput out) throws IOException {
             out.writeString(modelId);
             out.writeTimeValue(timeout);
             out.writeEnum(waitForState);
-            out.writeVInt(numberOfAllocations);
+            if (out.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS)) {
+                out.writeOptionalVInt(numberOfAllocations);
+            } else {
+                out.writeVInt(numberOfAllocations);
+            }
             out.writeVInt(threadsPerAllocation);
             out.writeVInt(queueCapacity);
             if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_4_0)) {
@@ -270,6 +311,9 @@ public void writeTo(StreamOutput out) throws IOException {
             if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_8_0)) {
                 out.writeString(deploymentId);
             }
+            if (out.getTransportVersion().onOrAfter(TransportVersions.INFERENCE_ADAPTIVE_ALLOCATIONS)) {
+                out.writeOptionalWriteable(adaptiveAllocationsSettings);
+            }
         }
 
         @Override
@@ -279,7 +323,12 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
             builder.field(DEPLOYMENT_ID.getPreferredName(), deploymentId);
             builder.field(TIMEOUT.getPreferredName(), timeout.getStringRep());
             builder.field(WAIT_FOR.getPreferredName(), waitForState);
-            builder.field(NUMBER_OF_ALLOCATIONS.getPreferredName(), numberOfAllocations);
+            if (numberOfAllocations != null) {
+                builder.field(NUMBER_OF_ALLOCATIONS.getPreferredName(), numberOfAllocations);
+            }
+            if (adaptiveAllocationsSettings != null) {
+                builder.field(ADAPTIVE_ALLOCATIONS.getPreferredName(), adaptiveAllocationsSettings);
+            }
             builder.field(THREADS_PER_ALLOCATION.getPreferredName(), threadsPerAllocation);
             builder.field(QUEUE_CAPACITY.getPreferredName(), queueCapacity);
             if (cacheSize != null) {
@@ -301,12 +350,25 @@ public ActionRequestValidationException validate() {
                         + Strings.arrayToCommaDelimitedString(VALID_WAIT_STATES)
                 );
             }
-            if (numberOfAllocations < 1) {
-                validationException.addValidationError("[" + NUMBER_OF_ALLOCATIONS + "] must be a positive integer");
+            if (numberOfAllocations != null) {
+                if (numberOfAllocations < 1) {
+                    validationException.addValidationError("[" + NUMBER_OF_ALLOCATIONS + "] must be a positive integer");
+                }
+                if (adaptiveAllocationsSettings != null && adaptiveAllocationsSettings.getEnabled()) {
+                    validationException.addValidationError(
+                        "[" + NUMBER_OF_ALLOCATIONS + "] cannot be set if adaptive allocations is enabled"
+                    );
+                }
             }
             if (threadsPerAllocation < 1) {
                 validationException.addValidationError("[" + THREADS_PER_ALLOCATION + "] must be a positive integer");
             }
+            ActionRequestValidationException autoscaleException = adaptiveAllocationsSettings == null
+                ? null
+                : adaptiveAllocationsSettings.validate();
+            if (autoscaleException != null) {
+                validationException.addValidationErrors(autoscaleException.validationErrors());
+            }
             if (threadsPerAllocation > MAX_THREADS_PER_ALLOCATION || isPowerOf2(threadsPerAllocation) == false) {
                 validationException.addValidationError(
                     "[" + THREADS_PER_ALLOCATION + "] must be a power of 2 less than or equal to " + MAX_THREADS_PER_ALLOCATION
@@ -322,7 +384,7 @@ public ActionRequestValidationException validate() {
                 validationException.addValidationError("[" + TIMEOUT + "] must be positive");
             }
             if (priority == Priority.LOW) {
-                if (numberOfAllocations > 1) {
+                if (numberOfAllocations != null && numberOfAllocations > 1) {
                     validationException.addValidationError("[" + NUMBER_OF_ALLOCATIONS + "] must be 1 when [" + PRIORITY + "] is low");
                 }
                 if (threadsPerAllocation > 1) {
@@ -344,6 +406,7 @@ public int hashCode() {
                 timeout,
                 waitForState,
                 numberOfAllocations,
+                adaptiveAllocationsSettings,
                 threadsPerAllocation,
                 queueCapacity,
                 cacheSize,
@@ -365,7 +428,8 @@ public boolean equals(Object obj) {
                 && Objects.equals(timeout, other.timeout)
                 && Objects.equals(waitForState, other.waitForState)
                 && Objects.equals(cacheSize, other.cacheSize)
-                && numberOfAllocations == other.numberOfAllocations
+                && Objects.equals(numberOfAllocations, other.numberOfAllocations)
+                && Objects.equals(adaptiveAllocationsSettings, other.adaptiveAllocationsSettings)
                 && threadsPerAllocation == other.threadsPerAllocation
                 && queueCapacity == other.queueCapacity
                 && priority == other.priority;
@@ -430,7 +494,7 @@ public static boolean mayAssignToNode(@Nullable DiscoveryNode node) {
             PARSER.declareInt(ConstructingObjectParser.optionalConstructorArg(), THREADS_PER_ALLOCATION);
             PARSER.declareInt(ConstructingObjectParser.constructorArg(), QUEUE_CAPACITY);
             PARSER.declareField(
-                optionalConstructorArg(),
+                ConstructingObjectParser.optionalConstructorArg(),
                 (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), CACHE_SIZE.getPreferredName()),
                 CACHE_SIZE,
                 ObjectParser.ValueType.VALUE