neulab · neubig · Oct 29, 2025 · Oct 29, 2025 · Oct 29, 2025
diff --git a/agents/openhands/system_prompt/tools/browser.py b/agents/openhands/system_prompt/tools/browser.py
@@ -53,7 +53,7 @@
 
         scroll(-50.2, -100.5)
 
-fill(bid: str, value: str)
+fill(bid: str, value: str, enable_autocomplete_menu: bool = False)
     Description: Fill out a form field. It focuses the element and triggers an input event with the entered text. It works for <input>, <textarea> and [contenteditable] elements.
     Examples:
         fill('237', 'example value')

diff --git a/datasets/toucan_1_5m/api.py b/datasets/toucan_1_5m/api.py
@@ -0,0 +1,14 @@
+def exa_search_web_search_exa(query: str, numResults: int = 5) -> dict:
+    """Search the web using Exa AI - performs real-time web searches and can scrape content from specific URLs.
+
+    Args:
+    ----
+        query (str): Search query
+        numResults (int): Number of search results to return (default: 5)
+
+    Returns:
+    -------
+        dict: Search results containing success status and results list
+
+    """
+    pass
diff --git a/datasets/toucan_1_5m/extract_raw.py b/datasets/toucan_1_5m/extract_raw.py
@@ -0,0 +1,28 @@
+import json
+
+from datasets import load_dataset
+
+# Load the Toucan-1.5M dataset from Hugging Face
+# For sample generation, we'll limit to just a few samples from the Kimi-K2 config
+ds = load_dataset("Agent-Ark/Toucan-1.5M", "Kimi-K2", split="train[:5]")
+
+# Process the samples
+for id, sample in enumerate(ds):
+    # Create a unique ID
+    sample_id = f"toucan_sample_{id}"
+
+    # Extract the relevant fields from the Toucan dataset
+    raw_sample = {
+        "id": sample_id,
+        "uuid": sample.get("uuid", ""),
+        "subset_name": sample.get("subset_name", ""),
+        "messages": sample.get("messages", []),
+        "question": sample.get("question", ""),
+        "available_tools": sample.get("available_tools", []),
+        "target_tools": sample.get("target_tools", []),
+        "question_quality_assessment": sample.get("question_quality_assessment", ""),
+        "response_quality_assessment": sample.get("response_quality_assessment", ""),
+        "metadata": sample.get("metadata", {}),
+    }
+
+    print(json.dumps(raw_sample))
diff --git a/datasets/toucan_1_5m/raw_to_standardized.py b/datasets/toucan_1_5m/raw_to_standardized.py
@@ -0,0 +1,104 @@
+import json
+import sys
+from typing import List, Union
+
+from schema.action.action import Action
+from schema.action.api import ApiAction
+from schema.action.message import MessageAction
+from schema.observation.observation import Observation
+from schema.observation.text import TextObservation
+from schema.trajectory import Trajectory
+
+
+def parse_function_call(function_call_data: dict) -> ApiAction:
+    """Parse function call data into ApiAction."""
+    function_name = function_call_data.get("name", "")
+    arguments = function_call_data.get("arguments", "{}")
+
+    # Parse arguments if they're a string
+    if isinstance(arguments, str):
+        try:
+            kwargs = json.loads(arguments)
+        except json.JSONDecodeError:
+            kwargs = {"raw_arguments": arguments}
+    else:
+        kwargs = arguments
+
+    # Convert function name to valid Python identifier
+    python_function_name = function_name.replace("-", "_")
+
+    return ApiAction(function=python_function_name, kwargs=kwargs, description=None)
+
+
+def convert_message(message: dict, message_id: str) -> List[Union[Action, Observation]]:
+    """Convert a single message to standardized format."""
+    role = message.get("role", "")
+    content = message.get("content", "")
+    function_call = message.get("function_call")
+
+    result = []
+
+    if role == "system":
+        # Skip system messages or convert to environment observation
+        if content.strip():
+            result.append(TextObservation(content=content, source="environment", name="system"))
+
+    elif role == "user":
+        result.append(TextObservation(content=content, source="user"))
+
+    elif role == "assistant":
+        # If there's a function call, create an ApiAction
+        if function_call:
+            result.append(parse_function_call(function_call))
+
+        # If there's content, create a MessageAction
+        if content.strip():
+            result.append(MessageAction(content=content, description=None))
+
+    elif role == "function":
+        # Function results are observations from the environment
+        result.append(
+            TextObservation(
+                content=content, source="environment", name=message.get("name", "function_result")
+            )
+        )
+
+    return result
+
+
+def convert_trajectory(raw_data: dict) -> Trajectory:
+    """Convert raw Toucan data to standardized trajectory format."""
+    trajectory_id = raw_data["id"]
+    messages = raw_data.get("messages", [])
+
+    content = []
+
+    for i, message in enumerate(messages):
+        converted_steps = convert_message(message, f"{trajectory_id}_{i}")
+        content.extend(converted_steps)
+
+    # Ensure the trajectory ends with a finish action if it doesn't already
+    if content and not (
+        isinstance(content[-1], MessageAction) and "<finish>" in content[-1].content
+    ):
+        if isinstance(content[-1], MessageAction):
+            # Wrap existing message action with finish tags
+            content[-1].content = f"<finish> {content[-1].content} </finish>"
+        else:
+            # Add a finish message action
+            content.append(
+                MessageAction(content="<finish> Task completed. </finish>", description=None)
+            )
+
+    return Trajectory(id=trajectory_id, content=content)
+
+
+# Process each line of input individually
+for line in sys.stdin:
+    try:
+        raw_data = json.loads(line)
+        standardized_trajectory = convert_trajectory(raw_data)
+        print(json.dumps(standardized_trajectory.model_dump()))
+    except Exception as e:
+        print(f"Error processing line: {e}", file=sys.stderr)
+        continue
diff --git a/datasets/toucan_1_5m/requirements.txt b/datasets/toucan_1_5m/requirements.txt
@@ -0,0 +1,2 @@
+datasets
+pydantic
diff --git a/datasets/toucan_1_5m/sample_raw.json b/datasets/toucan_1_5m/sample_raw.json
@@ -0,0 +1,75 @@
+[
+  {
+    "id": "toucan_sample_0",
+    "uuid": "ac0907b0-4f43-52e3-aafe-2d6a6f3e3213",
+    "subset_name": "single-turn-original",
+    "messages": [
+      {
+        "role": "system",
+        "content": "<|im_system|>tool_declare<|im_middle|>[{\"type\": \"function\", \"function\": {\"name\": \"exa-search-web_search_exa\", \"description\": \"Search the web using Exa AI - performs real-time web searches and can scrape content from specific URLs. Supports configurable result counts and returns the content from the most relevant websites.\", \"parameters\": {\"type\": \"object\", \"properties\": {\"query\": {\"type\": \"string\", \"description\": \"Search query\"}, \"numResults\": {\"type\": \"number\", \"description\": \"Number of search results to return (default: 5)\"}}, \"required\": [\"query\"]}}}]<|im_end|>"
+      },
+      {
+        "role": "user",
+        "content": "I'm considering joining a Series B fintech startup called \"StellarPay\" as a technical product manager, but before making any decisions, I need to thoroughly research the company. Can you help me understand their business model and recent developments?"
+      },
+      {
+        "role": "assistant",
+        "content": "I'll help you conduct comprehensive research on StellarPay to inform your decision. Let me search for information about their business model and recent developments."
+      },
+      {
+        "role": "assistant",
+        "content": "",
+        "function_call": {
+          "name": "exa-search-web_search_exa",
+          "arguments": "{\"query\": \"StellarPay fintech startup Series B business model recent developments\", \"numResults\": 5}"
+        }
+      },
+      {
+        "role": "function",
+        "content": "{\"success\": true, \"results\": [{\"title\": \"StellarPay Raises $50M Series B\", \"url\": \"https://example.com/stellarpay-series-b\", \"content\": \"StellarPay, a fintech startup focused on cross-border payments, has raised $50M in Series B funding...\"}]}",
+        "name": "exa-search-web_search_exa"
+      },
+      {
+        "role": "assistant",
+        "content": "Based on my research, StellarPay appears to be a promising fintech startup in the cross-border payments space. They recently raised $50M in Series B funding, which indicates strong investor confidence. Their business model focuses on facilitating international money transfers with lower fees and faster processing times compared to traditional banks."
+      }
+    ],
+    "question": "I'm considering joining a Series B fintech startup called \"StellarPay\" as a technical product manager, but before making any decisions, I need to thoroughly research the company. Can you help me understand their business model and recent developments?",
+    "available_tools": [
+      {
+        "type": "function",
+        "function": {
+          "name": "exa-search-web_search_exa",
+          "description": "Search the web using Exa AI - performs real-time web searches and can scrape content from specific URLs. Supports configurable result counts and returns the content from the most relevant websites.",
+          "parameters": {
+            "type": "object",
+            "properties": {
+              "query": {
+                "type": "string",
+                "description": "Search query"
+              },
+              "numResults": {
+                "type": "number",
+                "description": "Number of search results to return (default: 5)"
+              }
+            },
+            "required": ["query"]
+          }
+        }
+      }
+    ],
+    "target_tools": ["exa-search-web_search_exa"],
+    "question_quality_assessment": "This is a high-quality question that requires research and analysis. The user has a specific, realistic scenario and needs comprehensive information to make an informed decision.",
+    "response_quality_assessment": "The response demonstrates proper tool usage and provides relevant information based on the search results. The assistant correctly uses the web search tool and synthesizes the findings.",
+    "metadata": {
+      "prompt_id": "00000012",
+      "row_id": 12,
+      "mcp_servers": [
+        {
+          "server_id": 462,
+          "server_name": "exa-search"
+        }
+      ]
+    }
+  }
+]
diff --git a/datasets/toucan_1_5m/sample_sft.json b/datasets/toucan_1_5m/sample_sft.json
@@ -0,0 +1,28 @@
+[
+  {
+    "id": "toucan_sample_0",
+    "conversations": [
+      {
+        "from": "human",
+        "value": "I'm considering joining a Series B fintech startup called \"StellarPay\" as a technical product manager, but before making any decisions, I need to thoroughly research the company. Can you help me understand their business model and recent developments?"
+      },
+      {
+        "from": "gpt",
+        "value": "I'll help you conduct comprehensive research on StellarPay to inform your decision. Let me search for information about their business model and recent developments."
+      },
+      {
+        "from": "function_call",
+        "value": "<function=exa-search-web_search_exa>\n<parameter=query>StellarPay fintech startup Series B business model recent developments</parameter>\n<parameter=numResults>5</parameter>\n</function>"
+      },
+      {
+        "from": "function_result",
+        "value": "{\"success\": true, \"results\": [{\"title\": \"StellarPay Raises $50M Series B\", \"url\": \"https://example.com/stellarpay-series-b\", \"content\": \"StellarPay, a fintech startup focused on cross-border payments, has raised $50M in Series B funding...\"}]}"
+      },
+      {
+        "from": "function_call",
+        "value": "<function=finish>\n<parameter=message>\nBased on my research, StellarPay appears to be a promising fintech startup in the cross-border payments space. They recently raised $50M in Series B funding, which indicates strong investor confidence. Their business model focuses on facilitating international money transfers with lower fees and faster processing times compared to traditional banks.\n</parameter>\n<parameter=task_completed>\ntrue\n</parameter>\n</function>"
+      }
+    ],
+    "system": "You are a helpful AI assistant that can use various tools to help users with their requests. You have access to web search capabilities and other tools to gather information and complete tasks."
+  }
+]
diff --git a/datasets/toucan_1_5m/sample_sft/sample_sft_openhands.json b/datasets/toucan_1_5m/sample_sft/sample_sft_openhands.json
diff --git a/datasets/toucan_1_5m/sample_sft/sample_sft_sweagent.json b/datasets/toucan_1_5m/sample_sft/sample_sft_sweagent.json
@@ -0,0 +1,32 @@
+[
+  {
+    "id": "toucan_sample_0",
+    "conversations": [
+      {
+        "from": "human",
+        "value": "Below is a list of functions you can use in the bash environment. \n\nexa_search_web_search_exa(query: str, numResults: int = 5) -> dict\n    Search the web using Exa AI - performs real-time web searches and can scrape content from specific URLs.\n    \n    Args:\n    ----\n        query (str): Search query\n        numResults (int): Number of search results to return (default: 5)\n    \n    Returns:\n    -------\n        dict: Search results containing success status and results list\n\n<|im_system|>tool_declare<|im_middle|>[{\"type\": \"function\", \"function\": {\"name\": \"exa-search-web_search_exa\", \"description\": \"Search the web using Exa AI - performs real-time web searches and can scrape content from specific URLs. Supports configurable result counts and returns the content from the most relevant websites.\", \"parameters\": {\"type\": \"object\", \"properties\": {\"query\": {\"type\": \"string\", \"description\": \"Search query\"}, \"numResults\": {\"type\": \"number\", \"description\": \"Number of search results to return (default: 5)\"}}, \"required\": [\"query\"]}}}]<|im_end|>"
+      },
+      {
+        "from": "human",
+        "value": "I'm considering joining a Series B fintech startup called \"StellarPay\" as a technical product manager, but before making any decisions, I need to thoroughly research the company. Can you help me understand their business model and recent developments?"
+      },
+      {
+        "from": "gpt",
+        "value": "I'll help you conduct comprehensive research on StellarPay to inform your decision. Let me search for information about their business model and recent developments."
+      },
+      {
+        "from": "gpt",
+        "value": "<function=bash>\n<parameter=command>\nexa_search_web_search_exa(query=StellarPay fintech startup Series B business model recent developments, numResults=5)\n</parameter>\n</function>"
+      },
+      {
+        "from": "human",
+        "value": "OBSERVATION:\n{\"success\": true, \"results\": [{\"title\": \"StellarPay Raises $50M Series B\", \"url\": \"https://example.com/stellarpay-series-b\", \"content\": \"StellarPay, a fintech startup focused on cross-border payments, has raised $50M in Series B funding...\"}]}"
+      },
+      {
+        "from": "gpt",
+        "value": "Based on my research, StellarPay appears to be a promising fintech startup in the cross-border payments space. They recently raised $50M in Series B funding, which indicates strong investor confidence. Their business model focuses on facilitating international money transfers with lower fees and faster processing times compared to traditional banks.<function=submit>\n</function>"
+      }
+    ],
+    "system": "You are a helpful assistant that can interact with a computer to solve tasks.\n<IMPORTANT>\n* If user provides a path, you should NOT assume it's relative to the current working directory. Instead, you should explore the file system to find the file before working on it.\n</IMPORTANT>\n\nYou have access to the following functions:\n\n---- BEGIN FUNCTION #1:\nbash ----\nDescription: Execute a bash command in the terminal.\n\nParameters:\n  (1) command (string, required): The bash command to execute. Can be empty to view additional logs when previous exit code is `-1`. Can be `ctrl+c` to interrupt the currently running process.\n---- END FUNCTION #1 ----\n\n---- BEGIN FUNCTION #2: submit ----\nDescription: Finish the interaction when the task is complete OR if the assistant cannot proceed further with the task.\nNo parameters are required for this function.\n---- END FUNCTION #2 ----\n\n---- BEGIN FUNCTION #3: str_replace_editor ----\nDescription: Custom editing tool for viewing, creating and editing files\n* State is persistent across command calls and discussions with the user\n* If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep\n* The `create` command cannot be used if the specified `path` already exists as a file\n* If a `command` generates a long output, it will be truncated and marked with `<response clipped>`\n* The `undo_edit` command will revert the last edit made to the file at `path`\n\nNotes for using the `str_replace` command:\n* The `old_str` parameter should match EXACTLY one or more consecutive lines from the original file. Be mindful of whitespaces!\n* If the `old_str` parameter is not unique in the file, the replacement will not be performed. Make sure to include enough context in `old_str` to make it unique\n* The `new_str` parameter should contain the edited lines that should replace the `old_str`\n\nParameters:\n  (1) command (string, required): The commands to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`.\nAllowed values: [`view`, `create`, `str_replace`, `insert`, `undo_edit`]\n  (2) path (string, required): Absolute path to file or directory, e.g. `/repo/file.py` or `/repo`.\n  (3) file_text (string, optional): Required parameter of `create` command, with the content of the file to be created.\n  (4) old_str (string, optional): Required parameter of `str_replace` command containing the string in `path` to replace.\n  (5) new_str (string, optional): Optional parameter of `str_replace` command containing the new string (if not given, no string will be added). Required parameter of `insert` command containing the string to insert.\n  (6) insert_line (integer, optional): Required parameter of `insert` command. The `new_str` will be inserted AFTER the line `insert_line` of `path`.\n  (7) view_range (array, optional): Optional parameter of `view` command when `path` points to a file. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file.\n---- END FUNCTION #3 ----\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\nProvide any reasoning for the function call here.\n<function=example_function_name>\n<parameter=example_parameter_1>value_1</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format, start with <function= and end with </function>\n- Required parameters MUST be specified\n- Only call one function at a time\n- Always provide reasoning for your function call in natural language BEFORE the function call (not after)\n</IMPORTANT>"
+  }
+]