OpenHands · enyst · Nov 14, 2025 · Nov 14, 2025 · Nov 14, 2025 · Nov 14, 2025
diff --git a/docs/dev/apply_patch_responses_notes.md b/docs/dev/apply_patch_responses_notes.md
@@ -0,0 +1,60 @@
+# ApplyPatch + OpenAI Responses Integration Notes
+
+Status: fixed and validated
+Branch: feat/apply-patch-tool-gpt5-1
+PR: https://github.com/OpenHands/software-agent-sdk/pull/1166
+
+## Overview
+
+We integrated an ApplyPatch tool modeled after OpenAI's cookbook for GPT-5.1 "server-known" tools. The SDK advertises a minimal function tool schema to nudge the model to include a `patch` argument while relying on OpenAI's server-side tool definitions.
+
+## Key decisions
+
+- ToolDefinition.to_responses_tool returns a minimal schema for the canonical `patch` argument.
+- Example targets `openai/gpt-5.1-codex-mini` and uses the OPENAI_API_KEY from env.
+
+## Responses pipeline adjustments
+
+- Reasoning passthrough: we DO include the prior-turn `reasoning` item in input (test `test_assistant_includes_reasoning_passthrough` depends on this). It must not be the last input item; it should be followed by at least one other item (message or function_call), which our serializer ensures by ordering.
+- Assistant tool calls: we include assistant `function_call` items in input and pair them with `function_call_output` items produced by tools in the same request. This satisfies the server's validation that an output must correspond to a previous call in the same input batch.
+
+## Remaining issue
+
+- We still observe a 400 "No tool call found for function call output with call_id ...". This suggests a mismatch between assistant function_call ids and our tool function_call_output call_id, or we failed to include the assistant call in the same input batch.
+- Next steps: add tests for the Responses input assembly to ensure assistant function_call and tool function_call_output appear together and ids match.
+
+## Cross-check with FileEditor
+
+- Review FileEditor tool integration for execution and event serialization, ensuring ApplyPatch mirrors the same error-path handling (e.g., AgentErrorEvent on validation errors).
+
+## Testing plan
+
+- Unit tests for ApplyPatch executor: create/append/delete flows using minimal patches.
+- Serialization tests for Responses: verify that given an assistant function_call and a tool observation, `format_messages_for_responses` outputs `function_call` then `function_call_output` with matching ids and no reasoning echoes.
+
+## Telemetry tips
+
+- Enable `log_completions=True` to inspect requests/responses under `logs/completions/`.
+- Compare call_id values across turns and ensure consistency.
+
+## Minimal paired example (ApplyPatch)
+
+The Responses input array for a successful ApplyPatch turn includes:
+- assistant function_call: name "apply_patch", arguments {"patch": "*** Begin Patch ... *** End Patch"}
+- tool function_call_output: call_id equal to the assistant function_call's call_id; output contains the observation text, e.g., "Done!"
+
+Example (abridged):
+
+[
+  {"type": "function_call", "call_id": "fc_call_abc", "name": "apply_patch", "arguments": "{...}"},
+  {"type": "function_call_output", "call_id": "fc_call_abc", "output": "Done!"}
+]
+
+This pairing is required by OpenAI; otherwise, a 400 error is returned.
+
+## FileEditor vs ApplyPatch
+
+- Both tools now produce a text observation to ensure function_call_output is serialized.
+- ApplyPatch is a server-known tool; we advertise a minimal schema (only name and a minimal parameters stub) to nudge the model to pass a 'patch' field.
+- Telemetry now trims giant system instructions for readability and logs compact tool metadata.
+
diff --git a/examples/01_standalone_sdk/28_apply_patch_with_gpt5_1.py b/examples/01_standalone_sdk/28_apply_patch_with_gpt5_1.py
@@ -0,0 +1,82 @@
+"""Example: Using ApplyPatch tool with GPT-5.1 models via direct OpenAI API.
+
+This demonstrates adding a new ApplyPatch tool to the agent and guiding the
+model to create, modify, and delete a FACTS.txt file using 'apply_patch' text.
+
+Notes:
+- Works with any GPT-5.1 family model (names start with "gpt-5.1").
+- Uses direct OpenAI API through LiteLLM's LLM wrapper with no base_url.
+- Requires OPENAI_API_KEY in the environment (or LLM_API_KEY fallback).
+"""
+
+from __future__ import annotations
+
+import os
+
+from pydantic import SecretStr
+
+from openhands.sdk import LLM, Agent, Conversation, get_logger
+from openhands.sdk.tool import Tool
+from openhands.tools.apply_patch import ApplyPatchTool
+from openhands.tools.task_tracker import TaskTrackerTool
+
+# from openhands.tools.preset.default import register_default_tools
+from openhands.tools.terminal import TerminalTool
+
+
+logger = get_logger(__name__)
+
+api_key = os.getenv("OPENAI_API_KEY") or os.getenv("LLM_API_KEY")
+assert api_key, "Set OPENAI_API_KEY (or LLM_API_KEY) in your environment."
+
+# Choose a GPT-5.1 model; mini is cost-effective for examples
+default_model = "openai/gpt-5.1-codex-mini"
+model = os.getenv("LLM_MODEL", default_model)
+assert model.startswith("openai/gpt-5.1"), "Model must be an openai gpt-5.1 variant"
+
+# Force Chat Completions path by using a non-Responses model alias if needed
+if model.startswith("openai/gpt-5.1"):
+    # Litellm treats 'openai/gpt-5.1' via Responses; to avoid the Responses tool-output
+    # coupling for this example, we can strip the provider prefix for chat path.
+    # However, leave as-is to try Responses first; if it errors, instruct user below.
+    pass
+
+llm = LLM(
+    model=model,
+    api_key=SecretStr(api_key),
+    native_tool_calling=True,  # enable native tool calling (Responses API)
+    reasoning_summary=None,  # avoid OpenAI org verification requirement
+    log_completions=True,  # enable telemetry to log input/output payloads
+)
+
+# Explicitly register tool classes so Tool(name=...) can resolve
+# They self-register into the global registry on import
+_ = (TerminalTool, TaskTrackerTool, ApplyPatchTool)
+
+agent = Agent(
+    llm=llm,
+    tools=[
+        Tool(name="terminal"),
+        Tool(name="task_tracker"),
+        Tool(name="apply_patch"),
+    ],
+    system_prompt_kwargs={"cli_mode": True},
+)
+
+conversation = Conversation(agent=agent, workspace=os.getcwd())
+
+# Compose instructions guiding the model to use the new tool
+prompt = (
+    "Use the ApplyPatch tool to: "
+    "1) create a FACTS.txt with a single line 'OpenHands SDK integrates tools.'; "
+    "2) modify FACTS.txt by appending a second line 'ApplyPatch works.'; "
+    "3) delete FACTS.txt. "
+    "Only use the apply_patch format between '*** Begin Patch' and '*** End Patch' "
+    "when calling the tool."
+)
+
+conversation.send_message(prompt)
+conversation.run()
+
+print("Conversation finished.")
+print(f"EXAMPLE_COST: {llm.metrics.accumulated_cost}")
diff --git a/examples/01_standalone_sdk/29_file_editor_with_gpt5_1.py b/examples/01_standalone_sdk/29_file_editor_with_gpt5_1.py
@@ -0,0 +1,63 @@
+"""Example: Using FileEditor tool with GPT-5.1 models via direct OpenAI API.
+
+This mirrors the ApplyPatch example but uses FileEditor to create/modify/delete
+FACTS.txt. Useful for comparing Responses input/output behavior and logs.
+
+Requirements:
+- OPENAI_API_KEY in the environment (or LLM_API_KEY)
+- Model: any openai/gpt-5.1* variant; default uses openai/gpt-5.1-codex-mini
+"""
+
+from __future__ import annotations
+
+import os
+
+from pydantic import SecretStr
+
+from openhands.sdk import LLM, Agent, Conversation, get_logger
+from openhands.sdk.tool import Tool
+from openhands.tools.file_editor import FileEditorTool
+from openhands.tools.task_tracker import TaskTrackerTool
+from openhands.tools.terminal import TerminalTool
+
+
+logger = get_logger(__name__)
+
+api_key = os.getenv("OPENAI_API_KEY") or os.getenv("LLM_API_KEY")
+assert api_key, "Set OPENAI_API_KEY (or LLM_API_KEY) in your environment."
+
+model = os.getenv("LLM_MODEL", "openai/gpt-5.1-codex-mini")
+assert model.startswith("openai/gpt-5.1"), "Model must be an openai gpt-5.1 variant"
+
+llm = LLM(
+    model=model,
+    api_key=SecretStr(api_key),
+    native_tool_calling=True,
+    reasoning_summary=None,
+    log_completions=True,
+)
+
+# Ensure registration
+_ = (TerminalTool, TaskTrackerTool, FileEditorTool)
+
+agent = Agent(
+    llm=llm,
+    tools=[Tool(name="terminal"), Tool(name="task_tracker"), Tool(name="file_editor")],
+    system_prompt_kwargs={"cli_mode": True},
+)
+
+conversation = Conversation(agent=agent, workspace=os.getcwd())
+
+prompt = (
+    "You must use tools to perform all actions. Do not merely describe actions. "
+    "Use the FileEditor tool to: "
+    "1) create a FACTS.txt with a single line 'OpenHands SDK integrates tools.'; "
+    "2) modify FACTS.txt by appending a second line 'FileEditor works.'; "
+    "3) delete FACTS.txt using the terminal tool with: rm FACTS.txt."
+)
+
+conversation.send_message(prompt)
+conversation.run()
+
+print("Conversation finished.")
+print(f"EXAMPLE_COST: {llm.metrics.accumulated_cost}")
diff --git a/openhands-sdk/openhands/sdk/llm/utils/telemetry.py b/openhands-sdk/openhands/sdk/llm/utils/telemetry.py
@@ -50,7 +50,24 @@ class Telemetry(BaseModel):
     # ---------- Lifecycle ----------
     def on_request(self, log_ctx: dict | None) -> None:
         self._req_start = time.time()
-        self._req_ctx = log_ctx or {}
+        # Trim heavy fields in request context for readability
+        ctx = log_ctx or {}
+        # Compact tools into minimal metadata if present
+        tools = ctx.get("tools")
+        if isinstance(tools, (list, tuple)):
+            compact_tools = []
+            for t in tools:
+                try:
+                    compact_tools.append(
+                        {
+                            "name": getattr(t, "name", getattr(t, "title", "")),
+                            "kind": t.__class__.__name__,
+                        }
+                    )
+                except Exception:
+                    compact_tools.append(str(t))
+            ctx["tools"] = compact_tools
+        self._req_ctx = ctx
 
     def on_response(
         self,
@@ -239,6 +256,18 @@ def log_llm_call(
                 resp  # ModelResponse | ResponsesAPIResponse;
                 # serialized via _safe_json
             )
+            # Omit extremely large system instructions from logs for readability
+            try:
+                if (
+                    isinstance(data["response"], dict)
+                    and "instructions" in data["response"]
+                ):
+                    # Replace with trimmed preview and length
+                    instr = data["response"].get("instructions") or ""
+                    data["response"]["instructions_len"] = len(instr)
+                    data["response"]["instructions"] = "[omitted]"
+            except Exception:
+                pass
             data["cost"] = float(cost or 0.0)
             data["timestamp"] = time.time()
             data["latency_sec"] = self._last_latency

diff --git a/openhands-tools/openhands/tools/apply_patch/__init__.py b/openhands-tools/openhands/tools/apply_patch/__init__.py
@@ -0,0 +1,4 @@
+from .definition import ApplyPatchTool
+
+
+__all__ = ["ApplyPatchTool"]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,4 @@
		from .definition import ApplyPatchTool


		__all__ = ["ApplyPatchTool"]