From 42b0e207c17282588de3d4baac5f6cb007b5237f Mon Sep 17 00:00:00 2001 From: Ayakouji Date: Thu, 27 Nov 2025 16:05:39 +0800 Subject: [PATCH 1/3] update --- .../engine/sched/resource_manager_v1.py | 8 ++++--- fastdeploy/entrypoints/engine_client.py | 3 +++ fastdeploy/worker/gpu_model_runner.py | 22 ++++++++++++------- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/fastdeploy/engine/sched/resource_manager_v1.py b/fastdeploy/engine/sched/resource_manager_v1.py index 9f4873112d1..1faf54b0034 100644 --- a/fastdeploy/engine/sched/resource_manager_v1.py +++ b/fastdeploy/engine/sched/resource_manager_v1.py @@ -648,9 +648,11 @@ def _allocate_decode_and_extend(): break request = self.waiting[0] - if (self._is_mm_request(request) and self.exist_mm_prefill(scheduled_reqs)) or ( - paddle.is_compiled_with_xpu() and self.exist_prefill(scheduled_reqs) - ): + if ( + not envs.FD_ENABLE_MAX_PREFILL + and self._is_mm_request(request) + and self.exist_mm_prefill(scheduled_reqs) + ) or (paddle.is_compiled_with_xpu() and self.exist_prefill(scheduled_reqs)): break if request.status == RequestStatus.WAITING: result = self._waiting_async_process(request) diff --git a/fastdeploy/entrypoints/engine_client.py b/fastdeploy/entrypoints/engine_client.py index c29e6d7f672..4531081ecd9 100644 --- a/fastdeploy/entrypoints/engine_client.py +++ b/fastdeploy/entrypoints/engine_client.py @@ -47,6 +47,7 @@ ParameterError, StatefulSemaphore, api_server_logger, + to_tensor, ) @@ -401,6 +402,8 @@ def _send_task(self, task): if not self.enable_mm: self.zmq_client.send_json(task) else: + if envs.FD_ENABLE_E2W_TENSOR_CONVERT: + to_tensor([task]) self.zmq_client.send_pyobj(task) def valid_parameters(self, data): diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py index f824b48a46c..633d252f29f 100644 --- a/fastdeploy/worker/gpu_model_runner.py +++ b/fastdeploy/worker/gpu_model_runner.py @@ -424,12 +424,14 @@ def _apply_mm_inputs(self, request: Request, multi_vision_inputs: dict, rope_3d_ multi_vision_inputs["grid_thw_lst"].extend( inputs["grid_thw"][request.num_image_start : request.num_image_end] ) - multi_vision_inputs["cu_seqlens"].extend( - inputs["vit_seqlen"][request.num_image_start : request.num_image_end] - ) - multi_vision_inputs["vit_position_ids_lst"].extend( - inputs["vit_position_ids"][request.num_image_start : request.num_image_end] - ) + if hasattr(inputs, "vit_seqlen"): + multi_vision_inputs["cu_seqlens"].extend( + inputs["vit_seqlen"][request.num_image_start : request.num_image_end] + ) + if hasattr(inputs, "vit_position_ids"): + multi_vision_inputs["vit_seqlens"].extend( + inputs["vit_seqlen"][request.num_image_start : request.num_image_end] + ) else: vision_inputs = inputs if self.encoder_cache: @@ -2672,8 +2674,12 @@ def extract_vision_features_ernie(self, inputs: list[paddle.Tensor]) -> paddle.T def extract_vision_features_qwen(self, inputs: list[paddle.Tensor]) -> paddle.Tensor: assert inputs["images"] is not None - grid_thw = inputs["grid_thw"] - images = inputs["images"] + if envs.FD_ENABLE_MAX_PREFILL: + images = paddle.concat(inputs["images_lst"]).cast("bfloat16") + grid_thw = paddle.to_tensor(inputs["grid_thw_lst"], dtype="int64") + else: + grid_thw = inputs["grid_thw"] + images = inputs["images"] with paddle.amp.auto_cast( True, custom_black_list=self.amp_black, From b0fe8dbe7c4268bed5b54b54c3199745e31947a7 Mon Sep 17 00:00:00 2001 From: Ayakouji Date: Thu, 27 Nov 2025 16:19:55 +0800 Subject: [PATCH 2/3] fix Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- fastdeploy/worker/gpu_model_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py index 633d252f29f..de8f4dd756d 100644 --- a/fastdeploy/worker/gpu_model_runner.py +++ b/fastdeploy/worker/gpu_model_runner.py @@ -429,8 +429,8 @@ def _apply_mm_inputs(self, request: Request, multi_vision_inputs: dict, rope_3d_ inputs["vit_seqlen"][request.num_image_start : request.num_image_end] ) if hasattr(inputs, "vit_position_ids"): - multi_vision_inputs["vit_seqlens"].extend( - inputs["vit_seqlen"][request.num_image_start : request.num_image_end] + multi_vision_inputs["vit_position_ids_lst"].extend( + inputs["vit_position_ids"][request.num_image_start : request.num_image_end] ) else: vision_inputs = inputs From 1f7a79b70b10ea363b1bca16679a2939f0058636 Mon Sep 17 00:00:00 2001 From: Ayakouji Date: Thu, 4 Dec 2025 15:14:51 +0800 Subject: [PATCH 3/3] fix dict access --- fastdeploy/worker/gpu_model_runner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py index b54d11b93a2..d4d74c55c4c 100644 --- a/fastdeploy/worker/gpu_model_runner.py +++ b/fastdeploy/worker/gpu_model_runner.py @@ -478,11 +478,11 @@ def _apply_mm_inputs(self, request: Request, multi_vision_inputs: dict, rope_3d_ multi_vision_inputs["grid_thw_lst"].extend( inputs["grid_thw"][request.num_image_start : request.num_image_end] ) - if hasattr(inputs, "vit_seqlen"): + if "vit_seqlen" in inputs: multi_vision_inputs["cu_seqlens"].extend( inputs["vit_seqlen"][request.num_image_start : request.num_image_end] ) - if hasattr(inputs, "vit_position_ids"): + if "vit_position_ids" in inputs: multi_vision_inputs["vit_position_ids_lst"].extend( inputs["vit_position_ids"][request.num_image_start : request.num_image_end] ) @@ -2739,11 +2739,11 @@ def extract_vision_features_ernie(self, inputs: list[paddle.Tensor]) -> paddle.T return image_features def extract_vision_features_qwen(self, inputs: list[paddle.Tensor]) -> paddle.Tensor: - assert inputs["images"] is not None if envs.FD_ENABLE_MAX_PREFILL: images = paddle.concat(inputs["images_lst"]).cast("bfloat16") grid_thw = paddle.to_tensor(inputs["grid_thw_lst"], dtype="int64") else: + assert inputs["images"] is not None grid_thw = inputs["grid_thw"] images = inputs["images"] with paddle.amp.auto_cast(