Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 55 additions & 12 deletions interpreter/computer_use/tools/computer.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import asyncio
import base64
import math
import os
import platform
import shlex
import shutil
import subprocess
import tempfile
import time
from enum import StrEnum
Expand Down Expand Up @@ -67,6 +65,55 @@ def chunks(s: str, chunk_size: int) -> list[str]:
return [s[i : i + chunk_size] for i in range(0, len(s), chunk_size)]


MACOS_MODIFIER_ALIASES = {
"alt": "option",
"cmd": "command",
"command": "command",
"control": "control",
"ctrl": "control",
"option": "option",
"shift": "shift",
"super": "command",
}


def _escape_applescript_string(value: str) -> str:
return value.replace("\\", "\\\\").replace('"', '\\"')


def _normalize_macos_modifier(modifier: str) -> str | None:
return MACOS_MODIFIER_ALIASES.get(modifier.strip().lower())


def _run_macos_hotkey(keystroke: str, modifiers: list[str]) -> bool:
applescript_modifiers = []
for modifier in modifiers:
normalized_modifier = _normalize_macos_modifier(modifier)
if not normalized_modifier:
return False
applescript_modifiers.append(f"{normalized_modifier} down")

if not applescript_modifiers:
return False

escaped_keystroke = _escape_applescript_string(keystroke)
using_clause = applescript_modifiers[0]
if len(applescript_modifiers) > 1:
using_clause = "{" + ", ".join(applescript_modifiers) + "}"

script = f"""
tell application "System Events"
keystroke "{escaped_keystroke}" using {using_clause}
end tell
"""

try:
subprocess.run(["osascript", "-e", script], check=False)
return True
except OSError:
return False


def smooth_move_to(x, y, duration=1.2):
start_x, start_y = pyautogui.position()
dx = x - start_x
Expand Down Expand Up @@ -171,18 +218,14 @@ def normalize_key(key):
if len(keys) > 1:
if "darwin" in platform.system().lower():
# Use AppleScript for hotkey on macOS
keystroke, modifier = (keys[-1], "+".join(keys[:-1]))
modifier = modifier.lower() + " down"
keystroke = keys[-1]
modifiers = keys[:-1]
if keystroke.lower() == "space":
keystroke = " "
elif keystroke.lower() == "enter":
elif keystroke.lower() in {"enter", "return"}:
keystroke = "\n"
script = f"""
tell application "System Events"
keystroke "{keystroke}" using {modifier}
end tell
"""
os.system("osascript -e '{}'".format(script))
if not _run_macos_hotkey(keystroke, modifiers):
pyautogui.hotkey(*keys)
else:
pyautogui.hotkey(*keys)
else:
Expand Down
81 changes: 53 additions & 28 deletions interpreter/core/computer/keyboard/keyboard.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,31 @@
import os
import platform
import subprocess
import time

from ...utils.lazy_import import lazy_import

# Lazy import of pyautogui
pyautogui = lazy_import("pyautogui")

MACOS_MODIFIER_ALIASES = {
"alt": "option",
"cmd": "command",
"command": "command",
"control": "control",
"ctrl": "control",
"option": "option",
"shift": "shift",
"super": "command",
}


def _escape_applescript_string(value):
return value.replace("\\", "\\\\").replace('"', '\\"')


def _normalize_macos_modifier(modifier):
return MACOS_MODIFIER_ALIASES.get(modifier.strip().lower())


class Keyboard:
"""A class to simulate keyboard inputs"""
Expand Down Expand Up @@ -81,35 +100,41 @@ def hotkey(self, *args, interval=0.1):
Press a sequence of keys in the order they are provided, and then release them in reverse order.
"""
time.sleep(0.15)
modifiers = ["command", "option", "alt", "ctrl", "shift"]
if "darwin" in platform.system().lower() and len(args) == 2:
# pyautogui.hotkey seems to not work, so we use applescript
# Determine which argument is the keystroke and which is the modifier
keystroke, modifier = (
args if args[0].lower() not in modifiers else args[::-1]
)

modifier = modifier.lower()

# Map the modifier to the one that AppleScript expects
if " down" not in modifier:
modifier = modifier + " down"

if keystroke.lower() == "space":
keystroke = " "

if keystroke.lower() == "enter":
keystroke = "\n"

# Create the AppleScript
script = f"""
tell application "System Events"
keystroke "{keystroke}" using {modifier}
end tell
"""

# Execute the AppleScript
os.system("osascript -e '{}'".format(script))
normalized_args = [str(arg).strip().lower() for arg in args]
modifier = None
keystroke = None

first_modifier = _normalize_macos_modifier(normalized_args[0])
second_modifier = _normalize_macos_modifier(normalized_args[1])

if first_modifier and not second_modifier:
modifier = first_modifier
keystroke = str(args[1])
elif second_modifier and not first_modifier:
modifier = second_modifier
keystroke = str(args[0])

if modifier and keystroke is not None:
if keystroke.lower() == "space":
keystroke = " "
elif keystroke.lower() in {"enter", "return"}:
keystroke = "\n"

escaped_keystroke = _escape_applescript_string(keystroke)
script = f"""
tell application "System Events"
keystroke "{escaped_keystroke}" using {modifier} down
end tell
"""

try:
subprocess.run(["osascript", "-e", script], check=False)
except OSError:
pyautogui.hotkey(*args, interval=interval)
else:
pyautogui.hotkey(*args, interval=interval)
else:
pyautogui.hotkey(*args, interval=interval)
time.sleep(0.15)
Expand Down
78 changes: 78 additions & 0 deletions tests/computer_use/tools/test_computer_hotkey_security.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import importlib
import sys
import unittest
from unittest import mock


def load_computer_module_with_mock_pyautogui():
module_name = "interpreter.computer_use.tools.computer"
sys.modules.pop(module_name, None)
sys.modules.pop("interpreter.computer_use.tools", None)

mock_pyautogui = mock.Mock()
mock_pyautogui.size.return_value = (1920, 1080)

with mock.patch.dict(sys.modules, {"pyautogui": mock_pyautogui}):
module = importlib.import_module(module_name)

return module, mock_pyautogui


class TestComputerToolHotkeySecurity(unittest.IsolatedAsyncioTestCase):
def setUp(self):
self.computer_module, self.mock_pyautogui = (
load_computer_module_with_mock_pyautogui()
)

async def test_macos_key_action_uses_hotkey_runner(self):
tool = self.computer_module.ComputerTool()
tool.screenshot = mock.AsyncMock(
return_value=self.computer_module.ToolResult(base64_image="ok")
)
with mock.patch.object(
self.computer_module.platform, "system", return_value="Darwin"
), mock.patch.object(
self.computer_module, "_run_macos_hotkey", return_value=True
) as mock_run_macos_hotkey:
await tool(action="key", text="command+a")

mock_run_macos_hotkey.assert_called_once_with("a", ["command"])
self.mock_pyautogui.hotkey.assert_not_called()

async def test_macos_key_action_falls_back_to_pyautogui_hotkey(self):
tool = self.computer_module.ComputerTool()
tool.screenshot = mock.AsyncMock(
return_value=self.computer_module.ToolResult(base64_image="ok")
)
with mock.patch.object(
self.computer_module.platform, "system", return_value="Darwin"
), mock.patch.object(
self.computer_module, "_run_macos_hotkey", return_value=False
) as mock_run_macos_hotkey:
await tool(action="key", text="bad+a")

mock_run_macos_hotkey.assert_called_once_with("a", ["bad"])
self.mock_pyautogui.hotkey.assert_called_once_with("bad", "a")

def test_run_macos_hotkey_escapes_and_uses_subprocess_list_args(self):
with mock.patch.object(self.computer_module.subprocess, "run") as mock_run:
success = self.computer_module._run_macos_hotkey('a"b\\c', ["command"])

self.assertTrue(success)
mock_run.assert_called_once()
command = mock_run.call_args[0][0]
self.assertIsInstance(command, list)
self.assertEqual(command[0], "osascript")
self.assertEqual(command[1], "-e")
self.assertIn('keystroke "a\\"b\\\\c" using command down', command[2])

def test_run_macos_hotkey_rejects_invalid_modifier(self):
with mock.patch.object(self.computer_module.subprocess, "run") as mock_run:
success = self.computer_module._run_macos_hotkey("a", ["not_a_modifier"])

self.assertFalse(success)
mock_run.assert_not_called()


if __name__ == "__main__":
unittest.main()
90 changes: 90 additions & 0 deletions tests/core/computer/test_keyboard_hotkey_security.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import unittest
from unittest import mock

from interpreter.core.computer.keyboard.keyboard import Keyboard


class TestKeyboardHotkeySecurity(unittest.TestCase):
def setUp(self):
self.keyboard = Keyboard(mock.Mock())

@mock.patch("interpreter.core.computer.keyboard.keyboard.time.sleep")
@mock.patch("interpreter.core.computer.keyboard.keyboard.platform.system")
@mock.patch("interpreter.core.computer.keyboard.keyboard.subprocess.run")
def test_macos_hotkey_uses_subprocess_argument_list(
self, mock_subprocess_run, mock_platform_system, _
):
mock_platform_system.return_value = "Darwin"
mock_pyautogui = mock.Mock()

with mock.patch(
"interpreter.core.computer.keyboard.keyboard.pyautogui", mock_pyautogui
):
self.keyboard.hotkey("a", "command")

mock_subprocess_run.assert_called_once()
command = mock_subprocess_run.call_args[0][0]
self.assertIsInstance(command, list)
self.assertEqual(command[0], "osascript")
self.assertEqual(command[1], "-e")
self.assertIn('keystroke "a" using command down', command[2])
mock_pyautogui.hotkey.assert_not_called()

@mock.patch("interpreter.core.computer.keyboard.keyboard.time.sleep")
@mock.patch("interpreter.core.computer.keyboard.keyboard.platform.system")
@mock.patch("interpreter.core.computer.keyboard.keyboard.subprocess.run")
def test_macos_hotkey_escapes_applescript_keystroke_content(
self, mock_subprocess_run, mock_platform_system, _
):
mock_platform_system.return_value = "Darwin"
mock_pyautogui = mock.Mock()
payload = 'a"b\\c'

with mock.patch(
"interpreter.core.computer.keyboard.keyboard.pyautogui", mock_pyautogui
):
self.keyboard.hotkey(payload, "command")

command = mock_subprocess_run.call_args[0][0]
self.assertIn('keystroke "a\\"b\\\\c" using command down', command[2])
mock_pyautogui.hotkey.assert_not_called()

@mock.patch("interpreter.core.computer.keyboard.keyboard.time.sleep")
@mock.patch("interpreter.core.computer.keyboard.keyboard.platform.system")
@mock.patch("interpreter.core.computer.keyboard.keyboard.subprocess.run")
def test_invalid_modifier_falls_back_to_pyautogui(
self, mock_subprocess_run, mock_platform_system, _
):
mock_platform_system.return_value = "Darwin"
mock_pyautogui = mock.Mock()

with mock.patch(
"interpreter.core.computer.keyboard.keyboard.pyautogui", mock_pyautogui
):
self.keyboard.hotkey("a", "not_a_modifier")

mock_subprocess_run.assert_not_called()
mock_pyautogui.hotkey.assert_called_once_with(
"a", "not_a_modifier", interval=0.1
)

@mock.patch("interpreter.core.computer.keyboard.keyboard.time.sleep")
@mock.patch("interpreter.core.computer.keyboard.keyboard.platform.system")
@mock.patch("interpreter.core.computer.keyboard.keyboard.subprocess.run")
def test_non_macos_uses_pyautogui_hotkey(
self, mock_subprocess_run, mock_platform_system, _
):
mock_platform_system.return_value = "Linux"
mock_pyautogui = mock.Mock()

with mock.patch(
"interpreter.core.computer.keyboard.keyboard.pyautogui", mock_pyautogui
):
self.keyboard.hotkey("ctrl", "x", interval=0.2)

mock_subprocess_run.assert_not_called()
mock_pyautogui.hotkey.assert_called_once_with("ctrl", "x", interval=0.2)


if __name__ == "__main__":
unittest.main()