code exec interface + preload

frdel · frdel · commit 66e01e68e321 · 2025-08-12T20:04:38.000+02:00
diff --git a/preload.py b/preload.py
@@ -40,8 +40,8 @@ async def preload_kokoro():
         # async tasks to preload
         tasks = [
             preload_embedding(),
-            preload_whisper(),
-            preload_kokoro()
+            # preload_whisper(),
+            # preload_kokoro()
         ]
 
         await asyncio.gather(*tasks, return_exceptions=True)
diff --git a/python/api/synthesize.py b/python/api/synthesize.py
@@ -7,11 +7,11 @@
 class Synthesize(ApiHandler):
     async def process(self, input: dict, request: Request) -> dict | Response:
         text = input.get("text", "")
-        ctxid = input.get("ctxid", "")
+        # ctxid = input.get("ctxid", "")
         
-        context = self.get_context(ctxid)
-        if not await kokoro_tts.is_downloaded():
-            context.log.log(type="info", content="Kokoro TTS model is currently being initialized, please wait...")
+        # context = self.get_context(ctxid)
+        # if not await kokoro_tts.is_downloaded():
+        #     context.log.log(type="info", content="Kokoro TTS model is currently being initialized, please wait...")
 
         try:
             # # Clean and chunk text for long responses
diff --git a/python/api/transcribe.py b/python/api/transcribe.py
@@ -5,11 +5,11 @@
 class Transcribe(ApiHandler):
     async def process(self, input: dict, request: Request) -> dict | Response:
         audio = input.get("audio")
-        ctxid = input.get("ctxid", "")
+        # ctxid = input.get("ctxid", "")
 
-        context = self.get_context(ctxid)
-        if not await whisper.is_downloaded():
-            context.log.log(type="info", content="Whisper STT model is currently being initialized, please wait...")
+        # context = self.get_context(ctxid)
+        # if not await whisper.is_downloaded():
+        #     context.log.log(type="info", content="Whisper STT model is currently being initialized, please wait...")
 
         set = settings.get_settings()
         result = await whisper.transcribe(set["stt_model_size"], audio) # type: ignore
diff --git a/python/helpers/fasta2a_server.py b/python/helpers/fasta2a_server.py
@@ -258,7 +258,7 @@ def _configure(self):
             # Atomic update of the app
             self.app = new_app
 
-            _PRINTER.print("[A2A] FastA2A server configured successfully")
+            # _PRINTER.print("[A2A] FastA2A server configured successfully")
 
         except Exception as e:
             _PRINTER.print(f"[A2A] Failed to configure FastA2A server: {e}")
diff --git a/python/helpers/kokoro_tts.py b/python/helpers/kokoro_tts.py
@@ -7,6 +7,7 @@
 import soundfile as sf
 from python.helpers import runtime
 from python.helpers.print_style import PrintStyle
+from python.helpers.notification import NotificationManager, NotificationType, NotificationPriority
 
 warnings.filterwarnings("ignore", category=FutureWarning)
 warnings.filterwarnings("ignore", category=UserWarning)
@@ -38,9 +39,21 @@ async def _preload():
     try:
         is_updating_model = True
         if not _pipeline:
+            NotificationManager.send_notification(
+                NotificationType.INFO,
+                NotificationPriority.NORMAL,
+                "Loading Kokoro TTS model...",
+                display_time=99,
+                group="kokoro-preload")
             PrintStyle.standard("Loading Kokoro TTS model...")
             from kokoro import KPipeline
             _pipeline = KPipeline(lang_code="a", repo_id="hexgrad/Kokoro-82M")
+            NotificationManager.send_notification(
+                NotificationType.INFO,
+                NotificationPriority.NORMAL,
+                "Kokoro TTS model loaded.",
+                display_time=2,
+                group="kokoro-preload")
     finally:
         is_updating_model = False
 
diff --git a/python/helpers/settings.py b/python/helpers/settings.py
@@ -84,6 +84,8 @@ class Settings(TypedDict):
     rfc_port_http: int
     rfc_port_ssh: int
 
+    shell_interface: Literal['local','ssh']
+
     stt_model_size: str
     stt_language: str
     stt_silence_threshold: float
@@ -793,6 +795,17 @@ def convert_out(settings: Settings) -> SettingsOutput:
 
     dev_fields: list[SettingsField] = []
 
+    dev_fields.append(
+        {
+            "id": "shell_interface",
+            "title": "Shell Interface",
+            "description": "Terminal interface used for Code Execution Tool. Local Python TTY works locally in both dockerized and development environments. SSH always connects to dockerized environment (automatically at localhost or RFC host address).",
+            "type": "select",
+            "value": settings["shell_interface"],
+            "options": [{"value": "local", "label": "Local Python TTY"}, {"value": "ssh", "label": "SSH"}],
+        }
+    )
+
     if runtime.is_development():
         # dev_fields.append(
         #     {
@@ -1378,6 +1391,7 @@ def get_default_settings() -> Settings:
         rfc_password="",
         rfc_port_http=55080,
         rfc_port_ssh=55022,
+        shell_interface="local" if runtime.is_dockerized() else "ssh",
         stt_model_size="base",
         stt_language="en",
         stt_silence_threshold=0.3,
@@ -1539,7 +1553,7 @@ def set_root_password(password: str):
 def get_runtime_config(set: Settings):
     if runtime.is_dockerized():
         return {
-            "code_exec_ssh_enabled": False,
+            "code_exec_ssh_enabled": set["shell_interface"] == "ssh",
             "code_exec_ssh_addr": "localhost",
             "code_exec_ssh_port": 22,
             "code_exec_ssh_user": "root",
@@ -1553,7 +1567,7 @@ def get_runtime_config(set: Settings):
         if host.endswith("/"):
             host = host[:-1]
         return {
-            "code_exec_ssh_enabled": True,
+            "code_exec_ssh_enabled": set["shell_interface"] == "ssh",
             "code_exec_ssh_addr": host,
             "code_exec_ssh_port": set["rfc_port_ssh"],
             "code_exec_ssh_user": "root",
diff --git a/python/helpers/whisper.py b/python/helpers/whisper.py
@@ -5,6 +5,7 @@
 import asyncio
 from python.helpers import runtime, rfc, settings, files
 from python.helpers.print_style import PrintStyle
+from python.helpers.notification import NotificationManager, NotificationType, NotificationPriority
 
 # Suppress FutureWarning from torch.load
 warnings.filterwarnings("ignore", category=FutureWarning)
@@ -30,9 +31,21 @@ async def _preload(model_name:str):
     try:
         is_updating_model = True
         if not _model or _model_name != model_name:
-                PrintStyle.standard(f"Loading Whisper model: {model_name}")
-                _model = whisper.load_model(name=model_name, download_root=files.get_abs_path("/tmp/models/whisper")) # type: ignore
-                _model_name = model_name
+            NotificationManager.send_notification(
+                NotificationType.INFO,
+                NotificationPriority.NORMAL,
+                "Loading Whisper model...",
+                display_time=99,
+                group="whisper-preload")
+            PrintStyle.standard(f"Loading Whisper model: {model_name}")
+            _model = whisper.load_model(name=model_name, download_root=files.get_abs_path("/tmp/models/whisper")) # type: ignore
+            _model_name = model_name
+            NotificationManager.send_notification(
+                NotificationType.INFO,
+                NotificationPriority.NORMAL,
+                "Whisper model loaded.",
+                display_time=2,
+                group="whisper-preload")
     finally:
         is_updating_model = False
 
diff --git a/python/tools/code_execution_tool.py b/python/tools/code_execution_tool.py
@@ -15,6 +15,7 @@
 
 @dataclass
 class State:
+    ssh_enabled: bool
     shells: dict[int, LocalInteractiveSession | SSHInteractiveSession]
 
 
@@ -77,7 +78,8 @@ async def after_execution(self, response, **kwargs):
 
     async def prepare_state(self, reset=False, session: int | None = None):
         self.state: State | None = self.agent.get_data("_cet_state")
-        if not self.state:
+        # always reset state when ssh_enabled changes
+        if not self.state or self.state.ssh_enabled != self.agent.config.code_exec_ssh_enabled:
             # initialize shells dictionary if not exists
             shells: dict[int, LocalInteractiveSession | SSHInteractiveSession] = {}
         else:
@@ -114,7 +116,7 @@ async def prepare_state(self, reset=False, session: int | None = None):
             shells[session] = shell
             await shell.connect()
 
-        self.state = State(shells=shells)
+        self.state = State(shells=shells, ssh_enabled=self.agent.config.code_exec_ssh_enabled)
         self.agent.set_data("_cet_state", self.state)
         return self.state
 
@@ -201,9 +203,10 @@ async def get_terminal_output(
 
         # Common shell prompt regex patterns (add more as needed)
         prompt_patterns = [
-            re.compile(r"\\(venv\\).+[$#] ?$"),  # (venv) ...$ or (venv) ...#
+            re.compile(r"\(venv\).+[$#] ?$"),  # (venv) ...$ or (venv) ...#
             re.compile(r"root@[^:]+:[^#]+# ?$"),  # root@container:~#
             re.compile(r"[a-zA-Z0-9_.-]+@[^:]+:[^$#]+[$#] ?$"),  # user@host:~$
+            re.compile(r"bash-\d+\.\d+\$ ?$"),  # bash-3.2$ (version can vary)
         ]
 
         # potential dialog detection