[bugfix] Fix qwen3 coder template (#6409)

Jintao-Huang · Jintao-Huang · commit aa368293005c · 2025-11-03T17:19:31.000+08:00
diff --git a/README.md b/README.md
@@ -127,16 +127,16 @@ Running Environment:
 |--------------|--------------|---------------------|-------------------------------------------|
 | python       | >=3.9        | 3.10/3.11                |                                           |
 | cuda         |              | cuda12              | No need to install if using CPU, NPU, MPS |
-| torch        | >=2.0        | 2.7.1               |                                           |
-| transformers | >=4.33       | 4.56.2              |                                           |
+| torch        | >=2.0        | 2.8.0               |                                           |
+| transformers | >=4.33       | 4.57.1              |                                           |
 | modelscope   | >=1.23       |                     |                                           |
 | peft         | >=0.11,<0.18 |                     |                                           |
 | flash_attn   |              | 2.8.1/3.0.0b1 |                                           |
-| trl          | >=0.15,<0.24 | 0.20.0              | RLHF                                      |
-| deepspeed    | >=0.14       | 0.17.5              | Training                                  |
-| vllm         | >=0.5.1      | 0.10.1.1                | Inference/Deployment                      |
-| sglang       | >=0.4.6      | 0.4.10.post2         | Inference/Deployment                      |
-| lmdeploy     | >=0.5   | 0.9.2.post1                 | Inference/Deployment                      |
+| trl          | >=0.15,<0.24 | 0.23.1              | RLHF                                      |
+| deepspeed    | >=0.14       | 0.17.6              | Training                                  |
+| vllm         | >=0.5.1      | 0.11.0                | Inference/Deployment                      |
+| sglang       | >=0.4.6      | 0.5.4.post2         | Inference/Deployment                      |
+| lmdeploy     | >=0.5   | 0.10.2                 | Inference/Deployment                      |
 | evalscope    | >=1.0       |                     | Evaluation                                |
 | gradio       |              | 5.32.1              | Web-UI/App                                |
 
diff --git a/README_CN.md b/README_CN.md
@@ -123,16 +123,16 @@ pip install -e .
 |--------------|--------------|---------------------|--------------------|
 | python       | >=3.9        | 3.10/3.11            |                    |
 | cuda         |              | cuda12              | 使用cpu、npu、mps则无需安装 |
-| torch        | >=2.0        | 2.7.1               |                    |
-| transformers | >=4.33       | 4.56.2              |                    |
+| torch        | >=2.0        | 2.8.0               |                    |
+| transformers | >=4.33       | 4.57.1              |                    |
 | modelscope   | >=1.23       |                     |                    |
 | peft         | >=0.11,<0.18 |                     |                    |
 | flash_attn   |              | 2.8.1/3.0.0b1 |                    |
-| trl          | >=0.15,<0.24 | 0.20.0              | RLHF               |
-| deepspeed    | >=0.14       | 0.17.5              | 训练                 |
-| vllm         | >=0.5.1      | 0.10.1.1                | 推理/部署              |
-| sglang       | >=0.4.6      | 0.4.10.post2         | 推理/部署              |
-| lmdeploy     | >=0.5   | 0.9.2.post1                 | 推理/部署              |
+| trl          | >=0.15,<0.24 | 0.23.1              | RLHF               |
+| deepspeed    | >=0.14       | 0.17.6              | 训练                 |
+| vllm         | >=0.5.1      | 0.11.0                | 推理/部署              |
+| sglang       | >=0.4.6      | 0.5.4.post2         | 推理/部署              |
+| lmdeploy     | >=0.5   | 0.10.2                 | 推理/部署              |
 | evalscope    | >=1.0       |                     | 评测                 |
 | gradio       |              | 5.32.1              | Web-UI/App         |
 
diff --git a/docs/source/BestPractices/Qwen3-VL最佳实践.md b/docs/source/BestPractices/Qwen3-VL最佳实践.md
@@ -168,7 +168,7 @@ Overall, this is a sweet, lighthearted video that showcases the innocence and im
 
 Qwen3-VL的bbox输出采用归一化1000的相对坐标。你可以使用 ms-swift 提供的 grounding 数据集格式，其中"bbox"中的坐标为绝对坐标，ms-swift 会自动将绝对坐标转为归一化1000的相对坐标。更多信息请参考[grounding数据集格式文档](../Customization/自定义数据集.md#grounding)。
 ```jsonl
-{"messages": [{"role": "user", "content": "<image>找到图像中的<ref-object>"}, {"role": "assistant", "content": "[\n\t{\"bbox_2d\": <bbox>, \"label\": \"<ref-object>\"}\n\t{\"bbox_2d\": <bbox>, \"label\": \"<ref-object>\"}\n]"}], "images": ["cat.png"], "objects": {"ref": ["羊", "羊", "羊"], "bbox": [[90.9, 160.8, 135, 212.8], [360.9, 480.8, 495, 532.8]]}}
+{"messages": [{"role": "user", "content": "<image>找到图像中的<ref-object>"}, {"role": "assistant", "content": "[\n\t{\"bbox_2d\": <bbox>, \"label\": \"<ref-object>\"},\n\t{\"bbox_2d\": <bbox>, \"label\": \"<ref-object>\"}\n]"}], "images": ["cat.png"], "objects": {"ref": ["羊", "羊", "羊"], "bbox": [[90.9, 160.8, 135, 212.8], [360.9, 480.8, 495, 532.8]]}}
 ```
 
 ### Dense模型
diff --git a/docs/source/Customization/自定义数据集.md b/docs/source/Customization/自定义数据集.md
@@ -223,7 +223,7 @@ alpaca格式:
 
 对于Qwen2.5-VL/Qwen3-VL，你可以使用环境`QWENVL_BBOX_FORMAT='new'`（默认为'legacy'，需"ms-swift>=3.9.1"），以兼容[官方cookbook](https://github.com/QwenLM/Qwen3-VL/blob/main/cookbooks/2d_grounding.ipynb)格式。并将数据集定义成以下格式：
 ```jsonl
-{"messages": [{"role": "user", "content": "<image>找到图像中的<ref-object>"}, {"role": "assistant", "content": "[\n\t{\"bbox_2d\": <bbox>, \"label\": \"<ref-object>\"}\n\t{\"bbox_2d\": <bbox>, \"label\": \"<ref-object>\"}\n]"}], "images": ["cat.png"], "objects": {"ref": ["羊", "羊", "羊"], "bbox": [[90.9, 160.8, 135, 212.8], [360.9, 480.8, 495, 532.8]]}}
+{"messages": [{"role": "user", "content": "<image>找到图像中的<ref-object>"}, {"role": "assistant", "content": "[\n\t{\"bbox_2d\": <bbox>, \"label\": \"<ref-object>\"},\n\t{\"bbox_2d\": <bbox>, \"label\": \"<ref-object>\"}\n]"}], "images": ["cat.png"], "objects": {"ref": ["羊", "羊", "羊"], "bbox": [[90.9, 160.8, 135, 212.8], [360.9, 480.8, 495, 532.8]]}}
 ```
 
 测试ms-swift格式的grounding数据格式的最终格式：
diff --git a/docs/source/GetStarted/SWIFT安装.md b/docs/source/GetStarted/SWIFT安装.md
@@ -103,16 +103,16 @@ modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu2
 |--------------|--------------|---------------------|--------------------|
 | python       | >=3.9        | 3.10/3.11                |                    |
 | cuda         |              | cuda12              | 使用cpu、npu、mps则无需安装 |
-| torch        | >=2.0        | 2.7.1               |                    |
-| transformers | >=4.33       | 4.56.2              |                    |
+| torch        | >=2.0        | 2.8.0               |                    |
+| transformers | >=4.33       | 4.57.1              |                    |
 | modelscope   | >=1.23       |                     |                    |
 | peft         | >=0.11,<0.18 |                     |                    |
 | flash_attn   |              | 2.8.1/3.0.0b1 |                    |
-| trl          | >=0.15,<0.24 | 0.20.0              | RLHF               |
-| deepspeed    | >=0.14       | 0.17.5              | 训练                 |
-| vllm         | >=0.5.1      | 0.10.1.1                | 推理/部署              |
-| sglang       | >=0.4.6      | 0.4.10.post2         | 推理/部署              |
-| lmdeploy     | >=0.5   | 0.9.2.post1                 | 推理/部署              |
+| trl          | >=0.15,<0.24 | 0.23.1              | RLHF               |
+| deepspeed    | >=0.14       | 0.17.6              | 训练                 |
+| vllm         | >=0.5.1      | 0.11.0                | 推理/部署              |
+| sglang       | >=0.4.6      | 0.5.4.post2         | 推理/部署              |
+| lmdeploy     | >=0.5   | 0.10.2                 | 推理/部署              |
 | evalscope    | >=1.0       |                     | 评测                 |
 | gradio       |              | 5.32.1              | Web-UI/App         |
 
diff --git a/docs/source/Megatron-SWIFT/快速开始.md b/docs/source/Megatron-SWIFT/快速开始.md
@@ -62,12 +62,12 @@ modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu2
 |--------------|--------------|-------------|--------------------|
 | python       | >=3.9        | 3.10/3.11        |                    |
 | cuda         |              | cuda12      |                    |
-| torch        | >=2.0        | 2.6.0/2.7.1       |                    |
+| torch        | >=2.0        | 2.7.1/2.8.0       |                    |
 | transformer_engine    | >=2.3       |          |                  |
 | apex |   |  0.1 | |
-| megatron_core    | >=0.12       | 0.13      |                  |
+| megatron_core    |        | 0.13      |                  |
 | flash_attn    |        | 2.8.1/3.0.0b1   |                  |
-| transformers | >=4.33       | 4.56.2      |                    |
+| transformers | >=4.33       | 4.57.1      |                    |
 | modelscope   | >=1.23       |             |                    |
 | peft         | >=0.11,<0.18 |             |      LoRA          |
 | trl          | >=0.15,<0.24 |       |      RLHF        |
diff --git a/docs/source_en/BestPractices/Qwen3-VL-Best-Practice.md b/docs/source_en/BestPractices/Qwen3-VL-Best-Practice.md
@@ -167,7 +167,7 @@ If you need to fine-tune the model with a custom dataset, you can prepare the da
 Qwen3-VL's bbox output uses normalized 1000 relative coordinates. You can use the grounding dataset format provided by ms-swift, where the coordinates in "bbox" are absolute coordinates, and ms-swift will automatically convert absolute coordinates to normalized 1000 relative coordinates. For more information, please refer to the [Grounding Dataset Format Documentation](../Customization/Custom-dataset.md#grounding).
 
 ```jsonl
-{"messages": [{"role": "user", "content": "<image>Locate the <ref-object> in the image"}, {"role": "assistant", "content": "[\n\t{\"bbox_2d\": <bbox>, \"label\": \"<ref-object>\"}\n\t{\"bbox_2d\": <bbox>, \"label\": \"<ref-object>\"}\n]"}], "images": ["cat.png"], "objects": {"ref": ["sheep", "sheep", "sheep"], "bbox": [[90.9, 160.8, 135, 212.8], [360.9, 480.8, 495, 532.8]]}}
+{"messages": [{"role": "user", "content": "<image>Locate the <ref-object> in the image"}, {"role": "assistant", "content": "[\n\t{\"bbox_2d\": <bbox>, \"label\": \"<ref-object>\"},\n\t{\"bbox_2d\": <bbox>, \"label\": \"<ref-object>\"}\n]"}], "images": ["cat.png"], "objects": {"ref": ["sheep", "sheep", "sheep"], "bbox": [[90.9, 160.8, 135, 212.8], [360.9, 480.8, 495, 532.8]]}}
 ```
 
 ### Dense Models
diff --git a/docs/source_en/Customization/Custom-dataset.md b/docs/source_en/Customization/Custom-dataset.md
@@ -238,7 +238,7 @@ The format will automatically convert the dataset format to the corresponding mo
 
 For Qwen2.5-VL/Qwen3-VL, you can set the environment variable `QWENVL_BBOX_FORMAT='new'` (default is `'legacy'`, requires "ms-swift>=3.9.1") to be compatible with the [official cookbook](https://github.com/QwenLM/Qwen3-VL/blob/main/cookbooks/2d_grounding.ipynb) format. Define your dataset in the following format:
 ```jsonl
-{"messages": [{"role": "user", "content": "<image>Locate the <ref-object> in the image"}, {"role": "assistant", "content": "[\n\t{\"bbox_2d\": <bbox>, \"label\": \"<ref-object>\"}\n\t{\"bbox_2d\": <bbox>, \"label\": \"<ref-object>\"}\n]"}], "images": ["cat.png"], "objects": {"ref": ["sheep", "sheep", "sheep"], "bbox": [[90.9, 160.8, 135, 212.8], [360.9, 480.8, 495, 532.8]]}}
+{"messages": [{"role": "user", "content": "<image>Locate the <ref-object> in the image"}, {"role": "assistant", "content": "[\n\t{\"bbox_2d\": <bbox>, \"label\": \"<ref-object>\"},\n\t{\"bbox_2d\": <bbox>, \"label\": \"<ref-object>\"}\n]"}], "images": ["cat.png"], "objects": {"ref": ["sheep", "sheep", "sheep"], "bbox": [[90.9, 160.8, 135, 212.8], [360.9, 480.8, 495, 532.8]]}}
 ```
 
 Testing the final format of the grounding data in ms-swift format:
diff --git a/docs/source_en/GetStarted/SWIFT-installation.md b/docs/source_en/GetStarted/SWIFT-installation.md
@@ -104,16 +104,16 @@ More images can be found [here](https://modelscope.cn/docs/intro/environment-set
 |--------------|--------------|---------------------|-------------------------------------------|
 | python       | >=3.9        | 3.10/3.11                |                                           |
 | cuda         |              | cuda12              | No need to install if using CPU, NPU, MPS |
-| torch        | >=2.0        | 2.7.1               |                                           |
-| transformers | >=4.33       | 4.56.2              |                                           |
+| torch        | >=2.0        | 2.8.0               |                                           |
+| transformers | >=4.33       | 4.57.1              |                                           |
 | modelscope   | >=1.23       |                     |                                           |
 | peft         | >=0.11,<0.18 |                     |                                           |
 | flash_attn   |              | 2.8.1 /3.0.0b1 |                                           |
-| trl          | >=0.15,<0.24 | 0.20.0              | RLHF                                      |
-| deepspeed    | >=0.14       | 0.17.5              | Training                                  |
-| vllm         | >=0.5.1      | 0.10.1.1                | Inference/Deployment                      |
-| sglang       | >=0.4.6      | 0.4.10.post2         | Inference/Deployment                      |
-| lmdeploy     | >=0.5   | 0.9.2.post1                 | Inference/Deployment                      |
+| trl          | >=0.15,<0.24 | 0.23.1              | RLHF                                      |
+| deepspeed    | >=0.14       | 0.17.6              | Training                                  |
+| vllm         | >=0.5.1      | 0.11.0                | Inference/Deployment                      |
+| sglang       | >=0.4.6      | 0.5.4.post2         | Inference/Deployment                      |
+| lmdeploy     | >=0.5   | 0.10.2                 | Inference/Deployment                      |
 | evalscope    | >=1.0       |                     | Evaluation                                |
 | gradio       |              | 5.32.1              | Web-UI/App                                |
 
diff --git a/docs/source_en/Megatron-SWIFT/Quick-start.md b/docs/source_en/Megatron-SWIFT/Quick-start.md
@@ -62,12 +62,12 @@ Recommended Operating Environment:
 |--------------|--------------|-------------|--------------------|
 | python       | >=3.9        | 3.10/3.11        |                    |
 | cuda         |              | cuda12      |                    |
-| torch        | >=2.0        | 2.6.0/2.7.1    |                    |
+| torch        | >=2.0        | 2.7.1/2.8.0    |                    |
 | transformer_engine    | >=2.3       |         |                  |
 | apex |   |  0.1 | |
-| megatron_core    | >=0.12       | 0.13      |                  |
+| megatron_core    |        | 0.13      |                  |
 | flash_attn    |        | 2.8.1/3.0.0b1   |                  |
-| transformers | >=4.33       | 4.56.2      |                    |
+| transformers | >=4.33       | 4.57.1      |                    |
 | modelscope   | >=1.23       |             |                    |
 | peft         | >=0.11,<0.18 |             |      LoRA          |
 | trl          | >=0.15,<0.24 |       |      RLHF        |
diff --git a/swift/plugin/agent_template/qwen3_coder.py b/swift/plugin/agent_template/qwen3_coder.py
@@ -134,7 +134,7 @@ def _format_tools(self, tools: List[Union[str, dict]], system: Optional[str] = N
 
     def _format_tool_calls(self, tool_call_messages):
         result_parts = []
-        for message in tool_call_messages:
+        for idx, message in enumerate(tool_call_messages):
             tool_call = self._parse_tool_call(message['content'])
             result_parts.append(f"<tool_call>\n<function={tool_call['name']}>\n")
             # Processing parameters (if present)
@@ -151,6 +151,9 @@ def _format_tool_calls(self, tool_call_messages):
                     result_parts.append(f'{args_value}\n</parameter>\n')
             # Close tags
             result_parts.append('</function>\n</tool_call>')
+            # ref: https://github.com/QwenLM/Qwen3-Coder/blob/0ae30f55e9d6c47ff763c334f99c135ad68915dd/qwencoder-eval/tool_calling_eval/berkeley-function-call-leaderboard/bfcl_eval/model_handler/local_inference/qwen_fc.py#L21  # noqa
+            if idx != len(tool_call_messages) - 1:
+                result_parts.append('\n')
         return ''.join(result_parts)
 
     def _get_tool_responses(self, tool_messages):