-
-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[ResponsesAPI] Add GPTOSS MCP tool streaming #30301
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8f24ea8
6907c90
cec41d7
2aa50bd
4152c0d
c2c1c87
3837925
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,6 @@ | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # SPDX-FileCopyrightText: Copyright contributors to the vLLM project | ||
| import importlib | ||
| import importlib.util | ||
| import json | ||
| import time | ||
|
|
||
|
|
@@ -43,6 +43,8 @@ def server(): | |
| env_dict = dict( | ||
| VLLM_ENABLE_RESPONSES_API_STORE="1", | ||
| PYTHON_EXECUTION_BACKEND="dangerously_use_uv", | ||
| VLLM_GPT_OSS_SYSTEM_TOOL_MCP_LABELS="code_interpreter,container,web_search_preview", | ||
| VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS="1", | ||
| ) | ||
|
|
||
| with RemoteOpenAIServer(MODEL_NAME, args, env_dict=env_dict) as remote_server: | ||
|
|
@@ -850,6 +852,186 @@ async def test_function_calling_with_stream(client: OpenAI, model_name: str): | |
| assert event.response.output_text is not None | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| @pytest.mark.parametrize("model_name", [MODEL_NAME]) | ||
| async def test_mcp_code_interpreter_streaming(client: OpenAI, model_name: str, server): | ||
| tools = [ | ||
| { | ||
| "type": "mcp", | ||
| "server_label": "code_interpreter", | ||
| } | ||
| ] | ||
| input_text = ( | ||
| "Calculate 15 * 32 using python. " | ||
| "The python interpreter is not stateful and you must print to see the output." | ||
| ) | ||
|
|
||
| stream_response = await client.responses.create( | ||
| model=model_name, | ||
| input=input_text, | ||
| tools=tools, | ||
| stream=True, | ||
| temperature=0.0, | ||
| instructions=( | ||
| "You must use the Python tool to execute code. Never simulate execution." | ||
| ), | ||
| ) | ||
|
|
||
| mcp_call_added = False | ||
| mcp_call_in_progress = False | ||
| mcp_arguments_delta_seen = False | ||
| mcp_arguments_done = False | ||
| mcp_call_completed = False | ||
| mcp_item_done = False | ||
|
|
||
| code_interpreter_events_seen = False | ||
|
|
||
| async for event in stream_response: | ||
| if "code_interpreter" in event.type: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| code_interpreter_events_seen = True | ||
|
|
||
| if event.type == "response.output_item.added": | ||
| if hasattr(event.item, "type") and event.item.type == "mcp_call": | ||
| mcp_call_added = True | ||
| assert event.item.name == "python" | ||
| assert event.item.server_label == "code_interpreter" | ||
|
|
||
| elif event.type == "response.mcp_call.in_progress": | ||
| mcp_call_in_progress = True | ||
|
|
||
| elif event.type == "response.mcp_call_arguments.delta": | ||
| mcp_arguments_delta_seen = True | ||
| assert event.delta is not None | ||
|
|
||
| elif event.type == "response.mcp_call_arguments.done": | ||
| mcp_arguments_done = True | ||
| assert event.name == "python" | ||
| assert event.arguments is not None | ||
|
|
||
| elif event.type == "response.mcp_call.completed": | ||
| mcp_call_completed = True | ||
|
|
||
| elif ( | ||
| event.type == "response.output_item.done" | ||
| and hasattr(event.item, "type") | ||
| and event.item.type == "mcp_call" | ||
| ): | ||
| mcp_item_done = True | ||
| assert event.item.name == "python" | ||
| assert event.item.status == "completed" | ||
|
|
||
| assert mcp_call_added, "MCP call was not added" | ||
| assert mcp_call_in_progress, "MCP call in_progress event not seen" | ||
| assert mcp_arguments_delta_seen, "MCP arguments delta event not seen" | ||
| assert mcp_arguments_done, "MCP arguments done event not seen" | ||
| assert mcp_call_completed, "MCP call completed event not seen" | ||
| assert mcp_item_done, "MCP item done event not seen" | ||
|
|
||
| assert not code_interpreter_events_seen, ( | ||
| "Should not see code_interpreter events when using MCP type" | ||
| ) | ||
|
Comment on lines
+930
to
+932
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given the previous assertion on line 891, this assertion |
||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| @pytest.mark.parametrize("model_name", [MODEL_NAME]) | ||
| async def test_mcp_tool_multi_turn(client: OpenAI, model_name: str, server): | ||
| """Test MCP tool calling across multiple turns. | ||
|
|
||
| This test verifies that MCP tools work correctly in multi-turn conversations, | ||
| maintaining state across turns via the previous_response_id mechanism. | ||
| """ | ||
| tools = [ | ||
| { | ||
| "type": "mcp", | ||
| "server_label": "code_interpreter", | ||
| } | ||
| ] | ||
|
|
||
| # First turn - make a calculation | ||
| response1 = await client.responses.create( | ||
| model=model_name, | ||
| input="Calculate 50 * 60 using python and print the result.", | ||
| tools=tools, | ||
| temperature=0.0, | ||
| instructions=( | ||
| "You must use the Python tool to execute code. Never simulate execution." | ||
| ), | ||
| extra_body={"enable_response_messages": True}, | ||
| ) | ||
|
|
||
| assert response1 is not None | ||
| assert response1.status == "completed" | ||
|
|
||
| # Verify MCP call in first response by checking output_messages | ||
| tool_call_found = False | ||
| tool_response_found = False | ||
| for message in response1.output_messages: | ||
| recipient = message.get("recipient") | ||
| if recipient and recipient.startswith("python"): | ||
| tool_call_found = True | ||
|
|
||
| author = message.get("author", {}) | ||
| if ( | ||
| author.get("role") == "tool" | ||
| and author.get("name") | ||
| and author.get("name").startswith("python") | ||
| ): | ||
| tool_response_found = True | ||
|
|
||
| # Verify MCP tools were actually used | ||
| assert tool_call_found, "MCP tool call not found in output_messages" | ||
| assert tool_response_found, "MCP tool response not found in output_messages" | ||
|
|
||
| # Verify input messages: Should have system message with tool, NO developer message | ||
| developer_messages = [ | ||
| msg for msg in response1.input_messages if msg["author"]["role"] == "developer" | ||
| ] | ||
| assert len(developer_messages) == 0, ( | ||
| "No developer message expected for elevated tools" | ||
| ) | ||
|
|
||
| # Second turn - reference previous calculation | ||
| previous_messages = response1.input_messages + response1.output_messages | ||
|
|
||
| response2 = await client.responses.create( | ||
| model=model_name, | ||
| input="Now divide that result by 2.", | ||
| tools=tools, | ||
| temperature=0.0, | ||
| instructions=( | ||
| "You must use the Python tool to execute code. Never simulate execution." | ||
| ), | ||
| extra_body={ | ||
| "previous_input_messages": previous_messages, | ||
| "enable_response_messages": True, | ||
| }, | ||
| ) | ||
|
|
||
| assert response2 is not None | ||
| assert response2.status == "completed" | ||
|
|
||
| # Verify input messages are correct: should have two messages - | ||
| # one to the python recipient on analysis channel and one from tool role | ||
| mcp_recipient_messages = [] | ||
| tool_role_messages = [] | ||
| for msg in response2.input_messages: | ||
| if msg["author"]["role"] == "assistant": | ||
| # Check if this is a message to MCP recipient on analysis channel | ||
| if msg.get("channel") == "analysis" and msg.get("recipient"): | ||
| recipient = msg.get("recipient") | ||
| if recipient.startswith("code_interpreter") or recipient == "python": | ||
| mcp_recipient_messages.append(msg) | ||
| elif msg["author"]["role"] == "tool": | ||
| tool_role_messages.append(msg) | ||
|
|
||
| assert len(mcp_recipient_messages) > 0, ( | ||
| "Expected message(s) to MCP recipient on analysis channel" | ||
| ) | ||
| assert len(tool_role_messages) > 0, ( | ||
| "Expected message(s) from tool role after MCP call" | ||
| ) | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| @pytest.mark.parametrize("model_name", [MODEL_NAME]) | ||
| async def test_output_messages_enabled(client: OpenAI, model_name: str, server): | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The logic for handling
deltaevents might be flawed. If multipledeltaevents of the same type occur consecutively, only the first one is pushed to the stack, and subsequent ones are skipped (continue). However,pairs_of_event_typesmapsdeltatodoneevents, implying that eachdoneevent should correspond to adeltaevent. If intermediatedeltaevents are skipped, the finaldoneevent might pop an incorrectdeltaevent from the stack, leading to an assertion failure or an incorrectlen(stack_of_event_types) == 0at the end. Consider pushing alldeltaevents or refining the stack logic to correctly matchdoneevents with their correspondingdeltastreams.