From 42b0e207c17282588de3d4baac5f6cb007b5237f Mon Sep 17 00:00:00 2001
From: Ayakouji <yuhongh@qq.com>
Date: Thu, 27 Nov 2025 16:05:39 +0800
Subject: [PATCH 1/3] update

---
 .../engine/sched/resource_manager_v1.py       |  8 ++++---
 fastdeploy/entrypoints/engine_client.py       |  3 +++
 fastdeploy/worker/gpu_model_runner.py         | 22 ++++++++++++-------
 3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/fastdeploy/engine/sched/resource_manager_v1.py b/fastdeploy/engine/sched/resource_manager_v1.py
index 9f4873112d1..1faf54b0034 100644
--- a/fastdeploy/engine/sched/resource_manager_v1.py
+++ b/fastdeploy/engine/sched/resource_manager_v1.py
@@ -648,9 +648,11 @@ def _allocate_decode_and_extend():
                         break
 
                     request = self.waiting[0]
-                    if (self._is_mm_request(request) and self.exist_mm_prefill(scheduled_reqs)) or (
-                        paddle.is_compiled_with_xpu() and self.exist_prefill(scheduled_reqs)
-                    ):
+                    if (
+                        not envs.FD_ENABLE_MAX_PREFILL
+                        and self._is_mm_request(request)
+                        and self.exist_mm_prefill(scheduled_reqs)
+                    ) or (paddle.is_compiled_with_xpu() and self.exist_prefill(scheduled_reqs)):
                         break
                     if request.status == RequestStatus.WAITING:
                         result = self._waiting_async_process(request)
diff --git a/fastdeploy/entrypoints/engine_client.py b/fastdeploy/entrypoints/engine_client.py
index c29e6d7f672..4531081ecd9 100644
--- a/fastdeploy/entrypoints/engine_client.py
+++ b/fastdeploy/entrypoints/engine_client.py
@@ -47,6 +47,7 @@
     ParameterError,
     StatefulSemaphore,
     api_server_logger,
+    to_tensor,
 )
 
 
@@ -401,6 +402,8 @@ def _send_task(self, task):
         if not self.enable_mm:
             self.zmq_client.send_json(task)
         else:
+            if envs.FD_ENABLE_E2W_TENSOR_CONVERT:
+                to_tensor([task])
             self.zmq_client.send_pyobj(task)
 
     def valid_parameters(self, data):
diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py
index f824b48a46c..633d252f29f 100644
--- a/fastdeploy/worker/gpu_model_runner.py
+++ b/fastdeploy/worker/gpu_model_runner.py
@@ -424,12 +424,14 @@ def _apply_mm_inputs(self, request: Request, multi_vision_inputs: dict, rope_3d_
                 multi_vision_inputs["grid_thw_lst"].extend(
                     inputs["grid_thw"][request.num_image_start : request.num_image_end]
                 )
-                multi_vision_inputs["cu_seqlens"].extend(
-                    inputs["vit_seqlen"][request.num_image_start : request.num_image_end]
-                )
-                multi_vision_inputs["vit_position_ids_lst"].extend(
-                    inputs["vit_position_ids"][request.num_image_start : request.num_image_end]
-                )
+                if hasattr(inputs, "vit_seqlen"):
+                    multi_vision_inputs["cu_seqlens"].extend(
+                        inputs["vit_seqlen"][request.num_image_start : request.num_image_end]
+                    )
+                if hasattr(inputs, "vit_position_ids"):
+                    multi_vision_inputs["vit_seqlens"].extend(
+                        inputs["vit_seqlen"][request.num_image_start : request.num_image_end]
+                    )
             else:
                 vision_inputs = inputs
                 if self.encoder_cache:
@@ -2672,8 +2674,12 @@ def extract_vision_features_ernie(self, inputs: list[paddle.Tensor]) -> paddle.T
 
     def extract_vision_features_qwen(self, inputs: list[paddle.Tensor]) -> paddle.Tensor:
         assert inputs["images"] is not None
-        grid_thw = inputs["grid_thw"]
-        images = inputs["images"]
+        if envs.FD_ENABLE_MAX_PREFILL:
+            images = paddle.concat(inputs["images_lst"]).cast("bfloat16")
+            grid_thw = paddle.to_tensor(inputs["grid_thw_lst"], dtype="int64")
+        else:
+            grid_thw = inputs["grid_thw"]
+            images = inputs["images"]
         with paddle.amp.auto_cast(
             True,
             custom_black_list=self.amp_black,

From b0fe8dbe7c4268bed5b54b54c3199745e31947a7 Mon Sep 17 00:00:00 2001
From: Ayakouji <yuhongh@qq.com>
Date: Thu, 27 Nov 2025 16:19:55 +0800
Subject: [PATCH 2/3] fix

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 fastdeploy/worker/gpu_model_runner.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py
index 633d252f29f..de8f4dd756d 100644
--- a/fastdeploy/worker/gpu_model_runner.py
+++ b/fastdeploy/worker/gpu_model_runner.py
@@ -429,8 +429,8 @@ def _apply_mm_inputs(self, request: Request, multi_vision_inputs: dict, rope_3d_
                         inputs["vit_seqlen"][request.num_image_start : request.num_image_end]
                     )
                 if hasattr(inputs, "vit_position_ids"):
-                    multi_vision_inputs["vit_seqlens"].extend(
-                        inputs["vit_seqlen"][request.num_image_start : request.num_image_end]
+                    multi_vision_inputs["vit_position_ids_lst"].extend(
+                        inputs["vit_position_ids"][request.num_image_start : request.num_image_end]
                     )
             else:
                 vision_inputs = inputs

From 1f7a79b70b10ea363b1bca16679a2939f0058636 Mon Sep 17 00:00:00 2001
From: Ayakouji <yuhongh@qq.com>
Date: Thu, 4 Dec 2025 15:14:51 +0800
Subject: [PATCH 3/3] fix dict access

---
 fastdeploy/worker/gpu_model_runner.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py
index b54d11b93a2..d4d74c55c4c 100644
--- a/fastdeploy/worker/gpu_model_runner.py
+++ b/fastdeploy/worker/gpu_model_runner.py
@@ -478,11 +478,11 @@ def _apply_mm_inputs(self, request: Request, multi_vision_inputs: dict, rope_3d_
                 multi_vision_inputs["grid_thw_lst"].extend(
                     inputs["grid_thw"][request.num_image_start : request.num_image_end]
                 )
-                if hasattr(inputs, "vit_seqlen"):
+                if "vit_seqlen" in inputs:
                     multi_vision_inputs["cu_seqlens"].extend(
                         inputs["vit_seqlen"][request.num_image_start : request.num_image_end]
                     )
-                if hasattr(inputs, "vit_position_ids"):
+                if "vit_position_ids" in inputs:
                     multi_vision_inputs["vit_position_ids_lst"].extend(
                         inputs["vit_position_ids"][request.num_image_start : request.num_image_end]
                     )
@@ -2739,11 +2739,11 @@ def extract_vision_features_ernie(self, inputs: list[paddle.Tensor]) -> paddle.T
         return image_features
 
     def extract_vision_features_qwen(self, inputs: list[paddle.Tensor]) -> paddle.Tensor:
-        assert inputs["images"] is not None
         if envs.FD_ENABLE_MAX_PREFILL:
             images = paddle.concat(inputs["images_lst"]).cast("bfloat16")
             grid_thw = paddle.to_tensor(inputs["grid_thw_lst"], dtype="int64")
         else:
+            assert inputs["images"] is not None
             grid_thw = inputs["grid_thw"]
             images = inputs["images"]
         with paddle.amp.auto_cast(