diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py index 67a09f5c5d4..ff9156ca9f2 100644 --- a/fastdeploy/worker/gpu_model_runner.py +++ b/fastdeploy/worker/gpu_model_runner.py @@ -2387,7 +2387,7 @@ def profile_run(self) -> None: # 2. Dummy run self._dummy_run( num_tokens=self.scheduler_config.max_num_batched_tokens, - batch_size=self.scheduler_config.max_num_seqs, + batch_size=min(self.scheduler_config.max_num_seqs, 3), ) # 3. gc