elastic · prwhelan · Jul 22, 2025 · Jul 22, 2025 · Jul 23, 2025 · Jul 29, 2025
diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java
@@ -354,6 +354,7 @@ static TransportVersion def(int id) {
     public static final TransportVersion RERANK_SNIPPETS = def(9_130_0_00);
     public static final TransportVersion PIPELINE_TRACKING_INFO = def(9_131_0_00);
     public static final TransportVersion COMPONENT_TEMPLATE_TRACKING_INFO = def(9_132_0_00);
+    public static final TransportVersion INFERENCE_UPDATE_ML = def(9_133_0_00);
 
     /*
      * STOP! READ THIS FIRST! No, really,

diff --git a/.../main/java/org/elasticsearch/xpack/core/ml/action/UpdateTrainedModelDeploymentAction.java b/.../main/java/org/elasticsearch/xpack/core/ml/action/UpdateTrainedModelDeploymentAction.java
@@ -27,11 +27,17 @@
 import java.io.IOException;
 import java.util.Objects;
 
+import static org.elasticsearch.TransportVersions.INFERENCE_UPDATE_ML;
 import static org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction.Request.ADAPTIVE_ALLOCATIONS;
 import static org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction.Request.MODEL_ID;
 import static org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction.Request.NUMBER_OF_ALLOCATIONS;
 
 public class UpdateTrainedModelDeploymentAction extends ActionType<CreateTrainedModelAssignmentAction.Response> {
+    public enum Source {
+        API,
+        ADAPTIVE_ALLOCATIONS,
+        INFERENCE
+    }
 
     public static final UpdateTrainedModelDeploymentAction INSTANCE = new UpdateTrainedModelDeploymentAction();
     public static final String NAME = "cluster:admin/xpack/ml/trained_models/deployment/update";
@@ -73,7 +79,7 @@ public static Request parseRequest(String deploymentId, XContentParser parser) {
         private String deploymentId;
         private Integer numberOfAllocations;
         private AdaptiveAllocationsSettings adaptiveAllocationsSettings;
-        private boolean isInternal;
+        private Source source = Source.API;
 
         private Request() {
             super(TRAPPY_IMPLICIT_DEFAULT_MASTER_NODE_TIMEOUT, DEFAULT_ACK_TIMEOUT);
@@ -90,11 +96,17 @@ public Request(StreamInput in) throws IOException {
             if (in.getTransportVersion().before(TransportVersions.V_8_16_0)) {
                 numberOfAllocations = in.readVInt();
                 adaptiveAllocationsSettings = null;
-                isInternal = false;
+                source = Source.API;
             } else {
                 numberOfAllocations = in.readOptionalVInt();
                 adaptiveAllocationsSettings = in.readOptionalWriteable(AdaptiveAllocationsSettings::new);
-                isInternal = in.readBoolean();
+                if (in.getTransportVersion().before(INFERENCE_UPDATE_ML)) {
+                    // we changed over from a boolean to an enum
+                    // when it was a boolean, true came from adaptive allocations and false came from the rest api
+                    source = in.readBoolean() ? Source.ADAPTIVE_ALLOCATIONS : Source.API;
+                } else {
+                    source = in.readEnum(Source.class);
+                }
             }
         }
 
@@ -119,11 +131,15 @@ public void setAdaptiveAllocationsSettings(AdaptiveAllocationsSettings adaptiveA
         }
 
         public boolean isInternal() {
-            return isInternal;
+            return source == Source.INFERENCE || source == Source.ADAPTIVE_ALLOCATIONS;
         }
 
-        public void setIsInternal(boolean isInternal) {
-            this.isInternal = isInternal;
+        public void setSource(Source source) {
+            this.source = source != null ? source : this.source;
+        }
+
+        public Source getSource() {
+            return source;
         }
 
         public AdaptiveAllocationsSettings getAdaptiveAllocationsSettings() {
@@ -139,7 +155,14 @@ public void writeTo(StreamOutput out) throws IOException {
             } else {
                 out.writeOptionalVInt(numberOfAllocations);
                 out.writeOptionalWriteable(adaptiveAllocationsSettings);
-                out.writeBoolean(isInternal);
+                if (out.getTransportVersion().before(INFERENCE_UPDATE_ML)) {
+                    // we changed over from a boolean to an enum
+                    // when it was a boolean, true came from adaptive allocations and false came from the rest api
+                    // treat "inference" as if it came from the api
+                    out.writeBoolean(isInternal());
+                } else {
+                    out.writeEnum(source);
+                }
             }
         }
 
@@ -161,10 +184,10 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
         public ActionRequestValidationException validate() {
             ActionRequestValidationException validationException = new ActionRequestValidationException();
             if (numberOfAllocations != null) {
-                if (numberOfAllocations < 0 || (isInternal == false && numberOfAllocations == 0)) {
+                if (numberOfAllocations < 0 || (isInternal() == false && numberOfAllocations == 0)) {
                     validationException.addValidationError("[" + NUMBER_OF_ALLOCATIONS + "] must be a positive integer");
                 }
-                if (isInternal == false
+                if (isInternal() == false
                     && adaptiveAllocationsSettings != null
                     && adaptiveAllocationsSettings.getEnabled() == Boolean.TRUE) {
                     validationException.addValidationError(
@@ -183,7 +206,7 @@ public ActionRequestValidationException validate() {
 
         @Override
         public int hashCode() {
-            return Objects.hash(deploymentId, numberOfAllocations, adaptiveAllocationsSettings, isInternal);
+            return Objects.hash(deploymentId, numberOfAllocations, adaptiveAllocationsSettings, source);
         }
 
         @Override
@@ -198,7 +221,7 @@ public boolean equals(Object obj) {
             return Objects.equals(deploymentId, other.deploymentId)
                 && Objects.equals(numberOfAllocations, other.numberOfAllocations)
                 && Objects.equals(adaptiveAllocationsSettings, other.adaptiveAllocationsSettings)
-                && isInternal == other.isInternal;
+                && source == other.source;
         }
 
         @Override

diff --git a/...in/java/org/elasticsearch/xpack/inference/action/TransportUpdateInferenceModelAction.java b/...in/java/org/elasticsearch/xpack/inference/action/TransportUpdateInferenceModelAction.java
@@ -294,7 +294,7 @@ private void updateInClusterEndpoint(
             var updateRequest = new UpdateTrainedModelDeploymentAction.Request(deploymentId);
             updateRequest.setNumberOfAllocations(elasticServiceSettings.getNumAllocations());
             updateRequest.setAdaptiveAllocationsSettings(elasticServiceSettings.getAdaptiveAllocationsSettings());
-            updateRequest.setIsInternal(true);
+            updateRequest.setSource(UpdateTrainedModelDeploymentAction.Source.INFERENCE);
 
             var delegate = listener.<CreateTrainedModelAssignmentAction.Response>delegateFailure((l2, response) -> {
                 modelRegistry.updateModelTransaction(newModel, existingParsedModel, l2);

diff --git a/...lasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerService.java b/...lasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerService.java
@@ -520,7 +520,7 @@ private void updateNumberOfAllocations(
     ) {
         UpdateTrainedModelDeploymentAction.Request updateRequest = new UpdateTrainedModelDeploymentAction.Request(deploymentId);
         updateRequest.setNumberOfAllocations(numberOfAllocations);
-        updateRequest.setIsInternal(true);
+        updateRequest.setSource(UpdateTrainedModelDeploymentAction.Source.ADAPTIVE_ALLOCATIONS);
         ClientHelper.executeAsyncWithOrigin(
             client,
             ClientHelper.ML_ORIGIN,

diff --git a/...csearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerServiceTests.java b/...csearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerServiceTests.java
@@ -229,7 +229,7 @@ public void test_scaleUp() {
         verify(client, times(1)).execute(eq(GetDeploymentStatsAction.INSTANCE), any(), any());
         var updateRequest = new UpdateTrainedModelDeploymentAction.Request("test-deployment");
         updateRequest.setNumberOfAllocations(2);
-        updateRequest.setIsInternal(true);
+        updateRequest.setSource(UpdateTrainedModelDeploymentAction.Source.ADAPTIVE_ALLOCATIONS);
         verify(client, times(1)).execute(eq(UpdateTrainedModelDeploymentAction.INSTANCE), eq(updateRequest), any());
         verifyNoMoreInteractions(client, clusterService);
         reset(client, clusterService);
@@ -323,7 +323,7 @@ public void test_scaleDownToZero_whenNoRequests() {
         verify(client, times(1)).execute(eq(GetDeploymentStatsAction.INSTANCE), any(), any());
         var updateRequest = new UpdateTrainedModelDeploymentAction.Request("test-deployment");
         updateRequest.setNumberOfAllocations(0);
-        updateRequest.setIsInternal(true);
+        updateRequest.setSource(UpdateTrainedModelDeploymentAction.Source.ADAPTIVE_ALLOCATIONS);
         verify(client, times(1)).execute(eq(UpdateTrainedModelDeploymentAction.INSTANCE), eq(updateRequest), any());
         verifyNoMoreInteractions(client, clusterService);
 
@@ -468,7 +468,7 @@ public void test_noScaleDownToZero_whenRecentlyScaledUpByOtherNode() {
         verify(client, times(1)).execute(eq(GetDeploymentStatsAction.INSTANCE), any(), any());
         var updateRequest = new UpdateTrainedModelDeploymentAction.Request("test-deployment");
         updateRequest.setNumberOfAllocations(0);
-        updateRequest.setIsInternal(true);
+        updateRequest.setSource(UpdateTrainedModelDeploymentAction.Source.ADAPTIVE_ALLOCATIONS);
         verify(client, times(1)).execute(eq(UpdateTrainedModelDeploymentAction.INSTANCE), eq(updateRequest), any());
         verifyNoMoreInteractions(client, clusterService);
 

diff --git a/...ugin/src/yamlRestTest/resources/rest-api-spec/test/ml/update_trained_model_deployment.yml b/...ugin/src/yamlRestTest/resources/rest-api-spec/test/ml/update_trained_model_deployment.yml
@@ -6,5 +6,9 @@
         model_id: "missing-model"
         body: >
           {
-            "number_of_allocations": 4
+            "adaptive_allocations": {
+              "enabled": true,
+              "min_number_of_allocations": 0,
+              "max_number_of_allocations": 1
+            }
           }