From 175254094d58c6ddf394965aff98ade9cadb01cb Mon Sep 17 00:00:00 2001 From: David Chincharashvili Date: Wed, 3 Jun 2026 10:13:40 +0400 Subject: [PATCH 1/3] feat: friendliness quick wins for CLI first-run and Windows setup Eliminate silent failures and make setup self-diagnosing: - play_audio() returns bool; callers report when no player opened the file instead of leaving the user in silence. - --stream with no media player now prints an actionable warning. - --check-deps (new --doctor alias) shows per-platform install commands for missing deps and an "All set" closer on success. - No-args run prints a friendly welcome with common commands and points to --doctor / --help-full; one-time first-run nudge via marker file. - New extras/windows/Install-TTS_ka-Windows.ps1 orchestrator verifies prerequisites (TTS_ka on PATH, AutoHotkey v2) and runs both installers with a clear summary; readme gains a one-step setup section. Tests: 677 pass; added coverage for play_audio return values, DepRow.fix population, and report rendering. Co-Authored-By: Claude Opus 4.8 (1M context) --- extras/windows/Install-TTS_ka-Windows.ps1 | 158 ++++++++++++++++++++++ readme.md | 12 ++ src/TTS_ka/deps.py | 39 +++++- src/TTS_ka/fast_audio.py | 17 ++- src/TTS_ka/main.py | 47 ++++++- src/TTS_ka/ultra_fast.py | 4 +- src/TTS_ka/user_config.py | 23 ++++ tests/test_deps.py | 34 ++++- tests/test_fast_audio.py | 14 +- tests/test_json_output.py | 2 +- tests/test_main.py | 5 +- tests/test_ultra_fast.py | 2 +- 12 files changed, 333 insertions(+), 24 deletions(-) create mode 100644 extras/windows/Install-TTS_ka-Windows.ps1 diff --git a/extras/windows/Install-TTS_ka-Windows.ps1 b/extras/windows/Install-TTS_ka-Windows.ps1 new file mode 100644 index 0000000..06ecf76 --- /dev/null +++ b/extras/windows/Install-TTS_ka-Windows.ps1 @@ -0,0 +1,158 @@ +<# +.SYNOPSIS + One-step Windows setup for TTS_ka: context menu + global hotkeys, with + prerequisite checks so you know what (if anything) is missing. + +.DESCRIPTION + This is a thin orchestrator over the two existing installers: + - extras\windows\context_menu\Install-TTS_ka-ContextMenu.ps1 (right-click "Read with TTS_ka") + - extras\autohotkey\Install-TTS_ka-Hotkeys.ps1 (Alt+E/R/X global hotkeys) + + It first verifies that TTS_ka is runnable, then checks for AutoHotkey v2 + (needed only for the hotkeys + in-app selection menu), then runs both + installers and prints a "what you can do now" summary. + + Run from the repository root: + powershell -ExecutionPolicy Bypass -File .\extras\windows\Install-TTS_ka-Windows.ps1 + +.PARAMETER PythonPath + Path to python.exe, or "python" / "py" if on PATH. Passed to the context-menu installer. + +.PARAMETER Languages + Languages for the context menu (default: en, ru, ka, ka-m). Passed through. + +.PARAMETER SkipHotkeys + Install only the context menu; do not touch the AutoHotkey startup script. + +.PARAMETER SkipContextMenu + Install only the hotkeys; do not register the right-click menu. + +.PARAMETER Uninstall + Remove both the context menu and the hotkeys startup script. + +.PARAMETER WhatIf + Print actions only; do not modify the registry or Startup folder. +#> +param( + [string] $PythonPath = "", + [string[]] $Languages = @(), + [switch] $SkipHotkeys, + [switch] $SkipContextMenu, + [switch] $Uninstall, + [switch] $WhatIf +) + +$ErrorActionPreference = "Stop" + +$RepoRoot = Resolve-Path (Join-Path $PSScriptRoot "..\..") +$ContextPs1 = Join-Path $PSScriptRoot "context_menu\Install-TTS_ka-ContextMenu.ps1" +$HotkeysPs1 = Join-Path $RepoRoot "extras\autohotkey\Install-TTS_ka-Hotkeys.ps1" + +foreach ($p in @($ContextPs1, $HotkeysPs1)) { + if (-not (Test-Path -LiteralPath $p)) { + Write-Error "Missing installer: $p (run this from the TTS_ka repo)." + } +} + +function Test-TTSka { + # True if `tts-ka` console script or `python -m TTS_ka` resolves. + if (Get-Command "TTS_ka" -ErrorAction SilentlyContinue) { return $true } + foreach ($py in @("python", "py")) { + $c = Get-Command $py -ErrorAction SilentlyContinue + if (-not $c) { continue } + try { + & $c.Source -c "import TTS_ka" 2>$null + if ($LASTEXITCODE -eq 0) { return $true } + } catch { } + } + return $false +} + +function Test-AutoHotkeyV2 { + $names = @("AutoHotkey64.exe", "AutoHotkey32.exe") + $roots = @( + "${env:ProgramFiles}\AutoHotkey\v2", + "${env:ProgramFiles(x86)}\AutoHotkey\v2", + "${env:LocalAppData}\Programs\AutoHotkey\v2" + ) + foreach ($r in $roots) { + foreach ($n in $names) { + if (Test-Path -LiteralPath (Join-Path $r $n)) { return $true } + } + } + return [bool](Get-Command "AutoHotkey64.exe" -ErrorAction SilentlyContinue) +} + +Write-Host "TTS_ka Windows setup" +Write-Host ("=" * 40) + +# --- Uninstall path ------------------------------------------------------- +if ($Uninstall) { + if (-not $SkipContextMenu) { + & $ContextPs1 -Uninstall -WhatIf:$WhatIf + } + if (-not $SkipHotkeys) { + & $HotkeysPs1 -Uninstall -WhatIf:$WhatIf + } + Write-Host "" + Write-Host "Uninstall complete. (AutoHotkey itself is left installed.)" + exit 0 +} + +# --- Prerequisite: TTS_ka must be runnable -------------------------------- +if (-not (Test-TTSka)) { + Write-Warning "TTS_ka does not appear to be installed / on PATH." + Write-Host " Install it first, then re-run this script:" + Write-Host " pip install TTS_ka (add [hotkeys] for global hotkeys)" + Write-Host " Verify with: TTS_ka --version (or: python -m TTS_ka --version)" + exit 1 +} +Write-Host "[ok] TTS_ka is runnable." + +# --- Prerequisite: AutoHotkey v2 (hotkeys only) --------------------------- +$haveAhk = Test-AutoHotkeyV2 +if (-not $SkipHotkeys) { + if ($haveAhk) { + Write-Host "[ok] AutoHotkey v2 found." + } else { + Write-Warning "AutoHotkey v2 not found - hotkeys need it. Install with:" + Write-Host " winget install AutoHotkey.AutoHotkey" + Write-Host " (The right-click context menu works without AutoHotkey.)" + } +} + +# --- Install context menu ------------------------------------------------- +if (-not $SkipContextMenu) { + Write-Host "" + Write-Host "-> Registering context menu..." + $ctxArgs = @{ WhatIf = [bool]$WhatIf } + if ($PythonPath) { $ctxArgs["PythonPath"] = $PythonPath } + if ($Languages -and $Languages.Count -gt 0) { $ctxArgs["Languages"] = $Languages } + & $ContextPs1 @ctxArgs +} + +# --- Install hotkeys (only if AHK present) -------------------------------- +if (-not $SkipHotkeys -and $haveAhk) { + Write-Host "" + Write-Host "-> Installing global hotkeys..." + & $HotkeysPs1 -WhatIf:$WhatIf +} + +# --- Summary -------------------------------------------------------------- +Write-Host "" +Write-Host "What you can do now" +Write-Host ("-" * 40) +if (-not $SkipContextMenu) { + Write-Host " * Copy text (Ctrl+C), right-click empty space in Explorer/Desktop" + Write-Host " -> 'Read with TTS_ka' -> pick a language." +} +if (-not $SkipHotkeys -and $haveAhk) { + Write-Host " * Global hotkeys: Alt+E (English), Alt+R (Russian), Alt+X (Georgian)." + Write-Host " Apps key (or Ctrl+Alt+Right-click) opens a language menu anywhere." +} +Write-Host "" +Write-Host " Note: Windows cannot add items to the text-selection menu inside Chrome," +Write-Host " Edge, or Word. Use the AutoHotkey Apps-key menu there instead." +Write-Host "" +Write-Host " Verify your audio setup: TTS_ka --doctor" +Write-Host " Uninstall everything: ...\Install-TTS_ka-Windows.ps1 -Uninstall" diff --git a/readme.md b/readme.md index f8de2c8..1d4e427 100644 --- a/readme.md +++ b/readme.md @@ -285,6 +285,18 @@ Debian/Ubuntu may need Tk: `sudo apt install python3-tk`. ## Windows extras +### One-step setup (recommended) + +```powershell +powershell -ExecutionPolicy Bypass -File .\extras\windows\Install-TTS_ka-Windows.ps1 +``` + +Checks that TTS_ka is runnable and that AutoHotkey v2 is present (printing +`winget install AutoHotkey.AutoHotkey` if not), then installs the Explorer/Desktop +context menu and the global hotkeys, and prints a short "what you can do now" summary. +Flags: `-SkipHotkeys`, `-SkipContextMenu`, `-PythonPath`, `-Uninstall`, `-WhatIf`. The +individual installers below still work if you want finer control. + ### Native global hotkeys (no AutoHotkey) ```bash diff --git a/src/TTS_ka/deps.py b/src/TTS_ka/deps.py index 4f131a3..b7c3d1b 100644 --- a/src/TTS_ka/deps.py +++ b/src/TTS_ka/deps.py @@ -15,6 +15,29 @@ class DepRow: name: str ok: bool detail: str + fix: str = "" + + +def _platform_fix(win: str, mac: str, linux: str) -> str: + """Return the install command for the current platform.""" + if sys.platform.startswith("win"): + return win + if sys.platform == "darwin": + return mac + return linux + + +_FFMPEG_FIX = _platform_fix( + win="winget install Gyan.FFmpeg (or: choco install ffmpeg / scoop install ffmpeg)", + mac="brew install ffmpeg", + linux="sudo apt install ffmpeg (or your distro's package manager)", +) + +_PLAYER_FIX = _platform_fix( + win="winget install mpv (or install VLC)", + mac="brew install mpv", + linux="sudo apt install mpv (or vlc)", +) def _check_module(spec: str, import_name: str) -> DepRow: @@ -34,7 +57,8 @@ def check_ffmpeg() -> DepRow: return DepRow( "ffmpeg", False, - "not on PATH - install ffmpeg (required for merging chunks / pydub MP3)", + "not on PATH - required for merging chunks / pydub MP3", + fix=_FFMPEG_FIX, ) try: r = subprocess.run( @@ -61,6 +85,7 @@ def check_streaming_player() -> DepRow: "streaming player", False, "none of vlc, mpv, ffplay, mplayer found - optional unless you use --stream", + fix=_PLAYER_FIX, ) @@ -69,7 +94,8 @@ def check_soundfile() -> DepRow: return DepRow( "soundfile", False, - "optional pip install soundfile - faster merges when available", + "optional - faster merges when available", + fix="pip install soundfile", ) return _check_module("soundfile", "soundfile") @@ -81,7 +107,8 @@ def check_uvloop() -> DepRow: return DepRow( "uvloop", False, - "optional - pip install uvloop for faster asyncio on Linux/macOS", + "optional - faster asyncio on Linux/macOS", + fix="pip install uvloop", ) return _check_module("uvloop", "uvloop") @@ -113,9 +140,15 @@ def format_dep_report(rows: Optional[List[DepRow]] = None) -> str: else: flag = "!!" lines.append(f" [{flag}] {r.name.ljust(w)} {r.detail}") + if not r.ok and r.fix: + lines.append(f" {' ' * (len(flag) + 4)}{' ' * w} fix: {r.fix}") lines.append("") lines.append("ffmpeg: required for long/chunked output and reliable MP3 handling.") lines.append("streaming player: needed only for --stream (live playback while generating).") + critical = ("edge-tts", "pydub", "ffmpeg") + if all(r.ok for r in rows if r.name in critical): + lines.append("") + lines.append('All set — try: tts-ka "Hello world" -l en') return "\n".join(lines) diff --git a/src/TTS_ka/fast_audio.py b/src/TTS_ka/fast_audio.py index 344db1e..1618518 100644 --- a/src/TTS_ka/fast_audio.py +++ b/src/TTS_ka/fast_audio.py @@ -540,21 +540,26 @@ def _spawn_detached(argv: List[str]) -> bool: return False -def play_audio(file_path: str) -> None: - """Play an audio file using a platform-appropriate command (no shell).""" +def play_audio(file_path: str) -> bool: + """Play an audio file using a platform-appropriate command (no shell). + + Returns ``True`` when a player was launched, ``False`` when no player + could open the file so callers can tell the user instead of leaving them + in silence. + """ try: abs_path = os.path.abspath(file_path) if sys.platform.startswith("win"): os.startfile(abs_path) - return + return True if sys.platform == "darwin": - _spawn_detached(["open", abs_path]) - return + return _spawn_detached(["open", abs_path]) for player in ("mpv", "vlc", "xdg-open"): if shutil.which(player) and _spawn_detached([player, abs_path]): - return + return True except OSError: pass + return False async def cleanup_http() -> None: diff --git a/src/TTS_ka/main.py b/src/TTS_ka/main.py index 5af0baf..9380f01 100644 --- a/src/TTS_ka/main.py +++ b/src/TTS_ka/main.py @@ -23,6 +23,37 @@ def _cprint(msg: str) -> None: print(_re.sub(r"\[/?[^\]]*\]", "", msg), file=sys.stderr) +def _warn_no_player(file_path: str) -> None: + """Tell the user where the audio is when no player opened it automatically.""" + abs_path = os.path.abspath(file_path) + print( + f"Saved {abs_path}. No audio player opened it automatically — open it manually.", + file=sys.stderr, + ) + if not sys.platform.startswith("win") and sys.platform != "darwin": + print("Install mpv or vlc to enable auto-play.", file=sys.stderr) + + +def _print_welcome() -> None: + """Friendly no-args banner: the common example plus where to go next.""" + if is_first_run(): + print("Looks like your first run — verify your setup with: tts-ka --doctor") + print() + mark_first_run_done() + print("TTS_ka — text to speech for Georgian, Russian, and English.") + print() + print("Languages: en (English) ru (Russian) ka / ka-m (Georgian female/male)") + print() + print("Try:") + print(' tts-ka "Hello world" -l en Speak some text') + print(" tts-ka cb Speak whatever you copied (clipboard)") + print(" tts-ka file.txt -l ru Speak a file") + print() + print("Next:") + print(" tts-ka --doctor Verify ffmpeg / players / Python deps") + print(" tts-ka --help-full All options and examples") + + from .fast_audio import ( fast_generate_audio, play_audio, @@ -37,7 +68,9 @@ def _cprint(msg: str) -> None: apply_env_from_config, argparse_defaults_from_config, default_config_path, + is_first_run, load_user_config, + mark_first_run_done, resolved_playback_flags, ) from . import voices as _voices @@ -295,8 +328,10 @@ def main() -> None: ) parser.add_argument( "--check-deps", + "--doctor", action="store_true", - help="Print ffmpeg, streaming player, and Python dependency status; exit 1 if critical deps missing.", + dest="check_deps", + help="Diagnose setup: ffmpeg, streaming player, and Python deps with fix commands; exit 1 if critical deps missing. (--doctor is an alias)", ) parser.add_argument( @@ -525,7 +560,8 @@ def main() -> None: preview_path = tmp.name try: asyncio.run(_run_preview(phrase, voice.id, preview_path)) - play_audio(preview_path) + if not play_audio(preview_path): + _warn_no_player(preview_path) finally: try: os.remove(preview_path) @@ -589,9 +625,7 @@ def main() -> None: if not args.text or not args.text.strip(): if not args.json: - show_simple_help() - print("Error: No text provided") - print("Try: python -m TTS_ka 'your text' --lang en") + _print_welcome() else: emit({"event": "error", "message": "no text provided"}) return @@ -708,7 +742,8 @@ async def run_generation() -> None: raise SystemExit(2) if not no_play and not stream: - play_audio(output_path) + if not play_audio(output_path): + _warn_no_player(output_path) emit({ "event": "done", "output": output_path, diff --git a/src/TTS_ka/ultra_fast.py b/src/TTS_ka/ultra_fast.py index 8e5dd45..b889ee3 100644 --- a/src/TTS_ka/ultra_fast.py +++ b/src/TTS_ka/ultra_fast.py @@ -318,7 +318,9 @@ async def smart_generate_long_text( if detected is None: print( - f"Warning: no audio player found; audio will be saved to {output_path} but not streamed.", + "--stream needs a media player (vlc/mpv/ffplay); none found — " + f"generating without live playback. Audio will be saved to {output_path}. " + "Install one (run `tts-ka --doctor` for the command) to enable --stream.", file=sys.stderr, ) show_gui = False diff --git a/src/TTS_ka/user_config.py b/src/TTS_ka/user_config.py index 653e84c..b23142e 100644 --- a/src/TTS_ka/user_config.py +++ b/src/TTS_ka/user_config.py @@ -44,6 +44,29 @@ def default_config_path() -> Path: return Path.home() / ".tts_config.json" +def first_run_marker_path() -> Path: + """Marker written after the first run so the one-time nudge shows only once.""" + return Path.home() / ".tts_ka_first_run" + + +def is_first_run() -> bool: + """True when there is no config file and the first-run marker is absent.""" + try: + if default_config_path().is_file() or first_run_marker_path().exists(): + return False + except OSError: + return False + return True + + +def mark_first_run_done() -> None: + """Write the first-run marker; never raise if the home dir is not writable.""" + try: + first_run_marker_path().write_text("ok\n", encoding="utf-8") + except OSError: + pass + + def resolve_config_path(explicit: str | None) -> Path | None: """Pick config path: CLI ``--config``, then ``TTS_KA_CONFIG``, then default if it exists.""" if explicit and explicit.strip(): diff --git a/tests/test_deps.py b/tests/test_deps.py index eb278c7..b08f556 100644 --- a/tests/test_deps.py +++ b/tests/test_deps.py @@ -4,7 +4,12 @@ from unittest.mock import patch -from TTS_ka.deps import DepRow, format_dep_report, run_dependency_check +from TTS_ka.deps import ( + DepRow, + check_ffmpeg, + format_dep_report, + run_dependency_check, +) def test_format_dep_report_smoke() -> None: @@ -18,6 +23,33 @@ def test_format_dep_report_smoke() -> None: assert "[OK]" in text +def test_format_dep_report_shows_fix_for_failing_row() -> None: + rows = [ + DepRow("edge-tts", True, "ok"), + DepRow("pydub", True, "ok"), + DepRow("ffmpeg", False, "not on PATH", fix="winget install Gyan.FFmpeg"), + ] + text = format_dep_report(rows) + assert "fix: winget install Gyan.FFmpeg" in text + + +def test_format_dep_report_success_closer() -> None: + rows = [ + DepRow("edge-tts", True, "ok"), + DepRow("pydub", True, "ok"), + DepRow("ffmpeg", True, "ok"), + ] + text = format_dep_report(rows) + assert "All set" in text + + +def test_check_ffmpeg_missing_populates_fix() -> None: + with patch("TTS_ka.deps.shutil.which", return_value=None): + row = check_ffmpeg() + assert not row.ok + assert row.fix # a platform-specific install command is present + + @patch("TTS_ka.deps.collect_dep_rows") def test_run_dependency_check_success(mock_collect) -> None: mock_collect.return_value = [ diff --git a/tests/test_fast_audio.py b/tests/test_fast_audio.py index f9edb0c..9b18bf6 100644 --- a/tests/test_fast_audio.py +++ b/tests/test_fast_audio.py @@ -277,7 +277,7 @@ def test_windows(self, temp_dir): with open(f, "wb") as fp: fp.write(b"x") with patch('sys.platform', 'win32'), patch('os.startfile', create=True) as m: - play_audio(f) + assert play_audio(f) is True m.assert_called_once() def test_mac(self, temp_dir): @@ -298,14 +298,22 @@ def test_linux(self, temp_dir): with patch('sys.platform', 'linux'), \ patch('TTS_ka.fast_audio.shutil.which', return_value='/usr/bin/mpv'), \ patch('TTS_ka.fast_audio.subprocess.Popen'): - play_audio(f) # must not raise + assert play_audio(f) is True # must not raise def test_oserror_silenced(self, temp_dir): f = os.path.join(temp_dir, "t.mp3") with open(f, "wb") as fp: fp.write(b"x") with patch('sys.platform', 'win32'), patch('os.startfile', side_effect=OSError, create=True): - play_audio(f) # must not raise + assert play_audio(f) is False # must not raise; reports failure + + def test_linux_no_player_returns_false(self, temp_dir): + f = os.path.join(temp_dir, "t.mp3") + with open(f, "wb") as fp: + fp.write(b"x") + with patch('sys.platform', 'linux'), \ + patch('TTS_ka.fast_audio.shutil.which', return_value=None): + assert play_audio(f) is False # --------------------------------------------------------------------------- diff --git a/tests/test_json_output.py b/tests/test_json_output.py index 4820338..3eb5425 100644 --- a/tests/test_json_output.py +++ b/tests/test_json_output.py @@ -55,7 +55,7 @@ def test_tty_stdin_no_text_arg_shows_help(self, capsys): from TTS_ka.main import main main() mfa.assert_not_called() - assert "No text provided" in capsys.readouterr().out + assert "--doctor" in capsys.readouterr().out class TestJSONMode: diff --git a/tests/test_main.py b/tests/test_main.py index 55cbc9b..b15493c 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -97,12 +97,13 @@ def test_version_short_flag(self, capsys): assert "TTS_ka" in capsys.readouterr().out def test_no_text_shows_help(self, capsys): - """main() with no text argument prints help and returns.""" + """main() with no text argument prints the friendly welcome and returns.""" with patch('sys.argv', ['TTS_ka']): from TTS_ka.main import main main() out = capsys.readouterr().out - assert "Error: No text provided" in out + assert "--doctor" in out + assert "--help-full" in out def test_help_full_calls_show_simple_help(self, capsys): """--help-full calls show_simple_help and show_troubleshooting.""" diff --git a/tests/test_ultra_fast.py b/tests/test_ultra_fast.py index f014429..42c3c8f 100644 --- a/tests/test_ultra_fast.py +++ b/tests/test_ultra_fast.py @@ -314,7 +314,7 @@ async def test_stream_no_player_still_generates_file(self, tmp_path, capsys): msp_cls.assert_called_once() assert msp_cls.call_args.kwargs.get('show_gui') is False err = capsys.readouterr().err - assert "no audio player found" in err.lower() + assert "media player" in err.lower() async def test_stream_does_not_raise_systemexit(self, tmp_path): """The old SystemExit(1) path must NOT be reached when VLC is missing.""" From 25bd7097d6d907ab8e70158f6df50fa3ce0ab00f Mon Sep 17 00:00:00 2001 From: David Chincharashvili Date: Wed, 3 Jun 2026 10:24:52 +0400 Subject: [PATCH 2/3] feat: MCP reliability fixes and a guided GUI Setup tab MCP server (mcp_server.py): - speak(blocking=True) now truly waits for the audio's measured duration (via ffprobe through pydub) before returning, so an agent can sequence speech without overlap. The return string echoes the resolved settings, and a failure to launch any player is reported instead of faked. - _LiveSession tracks synths_failed + last_error; both surface in session_status (and synths_failed in list_sessions) so an agent can detect a failed synthesis. buffer_preview widened 80 -> 400 chars. GUI (gui.py): - New "Setup" tab, shown first: step 1 runs the dependency doctor inline (with a Re-check button), step 2 is a voice picker with a Preview button, step 3 jumps to the Speak tab. Replaces the old JSON-editor- first experience for newcomers. Tests: added MCP cases (blocking wait, no-player error, settings echo, synth-failure fields) and a display-guarded GUI smoke test that verifies Setup is the first tab. 683 pass; readme MCP tool table updated. Co-Authored-By: Claude Opus 4.8 (1M context) --- readme.md | 6 +- src/TTS_ka/gui.py | 139 +++++++++++++++++++++++++++++++++++++++ src/TTS_ka/mcp_server.py | 70 ++++++++++++++++---- tests/test_gui_smoke.py | 31 +++++++++ tests/test_mcp_server.py | 73 ++++++++++++++++++++ 5 files changed, 304 insertions(+), 15 deletions(-) diff --git a/readme.md b/readme.md index 1d4e427..e003c99 100644 --- a/readme.md +++ b/readme.md @@ -111,16 +111,16 @@ Tools exposed: | Tool | Purpose | |------|--------| -| `speak(text, lang?, voice?)` | One-shot: synthesize and play immediately | +| `speak(text, lang?, voice?, blocking?)` | One-shot: synthesize and play. `blocking=True` waits for the audio's full duration before returning so the agent can sequence speech; the return string echoes the resolved settings | | `stream_open(lang?, voice?)` | Start a streaming session, returns `session_id` | | `stream_append(session_id, text)` | Push text; speaks each complete sentence | | `stream_close(session_id)` | Drain remaining buffer, end the session | -| `session_status(session_id)` | Inspect progress: total, pending synths, buffer preview | +| `session_status(session_id)` | Inspect progress: total, pending synths, `synths_failed` + `last_error`, buffer preview | | `list_sessions()` | All active session IDs | | `stop()` | Abort all playback and tear down sessions | | `list_voices(lang?)` | Voice catalog as JSON | -Why streaming over single `speak` calls: the LLM can push tokens as it generates them. Each completed sentence is synthesized immediately, so the user hears audio with sub-second latency from the LLM's first word. `session_status` reports `synths_pending` so the agent knows when the queue is backed up. +Why streaming over single `speak` calls: the LLM can push tokens as it generates them. Each completed sentence is synthesized immediately, so the user hears audio with sub-second latency from the LLM's first word. `session_status` reports `synths_pending` (queue backed up) and `synths_failed` / `last_error` (a synthesis failed) so the agent can react. ### `--json`: machine-readable progress diff --git a/src/TTS_ka/gui.py b/src/TTS_ka/gui.py index 5345f07..7369944 100644 --- a/src/TTS_ka/gui.py +++ b/src/TTS_ka/gui.py @@ -302,12 +302,17 @@ def __init__( outer.pack(fill=tk.BOTH, expand=True) nb = ttk.Notebook(outer) nb.pack(fill=tk.BOTH, expand=True) + self._nb = nb + tab_setup = ttk.Frame(nb, padding=8) tab_speak = ttk.Frame(nb, padding=8) tab_cfg = ttk.Frame(nb, padding=8) + self._tab_speak = tab_speak + nb.add(tab_setup, text="Setup") nb.add(tab_speak, text="Speak") nb.add(tab_cfg, text="Config") + self._build_setup_tab(tab_setup) self._build_speak_tab(tab_speak) self._build_config_tab(tab_cfg) @@ -328,6 +333,140 @@ def __init__( self._worker: threading.Thread | None = None + def _build_setup_tab(self, frm: Any) -> None: + """Guided first screen: verify setup, pick + preview a voice, then speak.""" + import tkinter as tk + from tkinter import ttk, scrolledtext + + from . import voices as _voices + + frm.columnconfigure(0, weight=1) + + ttk.Label(frm, text="Welcome to TTS_ka").grid(row=0, column=0, sticky=tk.W) + ttk.Label( + frm, + text="Three quick steps to get going.", + wraplength=520, + justify=tk.LEFT, + ).grid(row=1, column=0, sticky=tk.W, pady=(2, 10)) + + # --- Step 1: verify setup ------------------------------------------- + ttk.Label(frm, text="1. Verify your setup").grid(row=2, column=0, sticky=tk.W) + self._setup_report = scrolledtext.ScrolledText(frm, height=9, wrap=tk.WORD) + self._setup_report.grid(row=3, column=0, sticky=(tk.E, tk.W), pady=(4, 0)) + self._setup_report.insert("1.0", "Checking dependencies…") + self._setup_report.configure(state=tk.DISABLED) + ttk.Button(frm, text="Re-check", command=self._on_setup_recheck).grid( + row=4, column=0, sticky=tk.W, pady=(4, 0) + ) + + ttk.Separator(frm, orient=tk.HORIZONTAL).grid( + row=5, column=0, sticky=(tk.E, tk.W), pady=12 + ) + + # --- Step 2: pick + preview a voice --------------------------------- + ttk.Label(frm, text="2. Pick a voice and preview it").grid(row=6, column=0, sticky=tk.W) + self._setup_voices = _voices.all_voices() + names = [f"{v.display_name} — {v.locale} ({v.gender})" for v in self._setup_voices] + row7 = ttk.Frame(frm) + row7.grid(row=7, column=0, sticky=tk.W, pady=(4, 0)) + self.setup_voice_var = tk.StringVar(value=names[0] if names else "") + self._setup_voice_combo = ttk.Combobox( + row7, values=names, textvariable=self.setup_voice_var, + state="readonly", width=40, + ) + self._setup_voice_combo.pack(side=tk.LEFT) + self._setup_preview_btn = ttk.Button( + row7, text="Preview voice", command=self._on_setup_preview + ) + self._setup_preview_btn.pack(side=tk.LEFT, padx=(8, 0)) + self.setup_status = tk.StringVar(value="") + ttk.Label(frm, textvariable=self.setup_status, wraplength=520, justify=tk.LEFT).grid( + row=8, column=0, sticky=tk.W, pady=(4, 0) + ) + + ttk.Separator(frm, orient=tk.HORIZONTAL).grid( + row=9, column=0, sticky=(tk.E, tk.W), pady=12 + ) + + # --- Step 3: start speaking ----------------------------------------- + ttk.Label(frm, text="3. Start speaking").grid(row=10, column=0, sticky=tk.W) + ttk.Button( + frm, text="Go to Speak tab →", + command=lambda: self._nb.select(self._tab_speak), + ).grid(row=11, column=0, sticky=tk.W, pady=(4, 0)) + + # Kick off the dependency check in the background so the UI stays responsive. + self._run_setup_check_async() + + def _run_setup_check_async(self) -> None: + from .deps import collect_dep_rows, format_dep_report + + def work() -> None: + try: + report = format_dep_report(collect_dep_rows()) + except Exception as exc: # noqa: BLE001 + report = f"Could not run the dependency check: {exc}" + + def show() -> None: + self._setup_report.configure(state=self._tk.NORMAL) + self._setup_report.delete("1.0", "end") + self._setup_report.insert("1.0", report) + self._setup_report.configure(state=self._tk.DISABLED) + + self.root.after(0, show) + + threading.Thread(target=work, daemon=True).start() + + def _on_setup_recheck(self) -> None: + self._setup_report.configure(state=self._tk.NORMAL) + self._setup_report.delete("1.0", "end") + self._setup_report.insert("1.0", "Checking dependencies…") + self._setup_report.configure(state=self._tk.DISABLED) + self._run_setup_check_async() + + def _on_setup_preview(self) -> None: + from . import voices as _voices + + names = [f"{v.display_name} — {v.locale} ({v.gender})" for v in self._setup_voices] + sel = self.setup_voice_var.get() + if sel not in names: + self.setup_status.set("Pick a voice first.") + return + voice = self._setup_voices[names.index(sel)] + phrase = _voices.PREVIEW_PHRASE.get(voice.lang, _voices.PREVIEW_PHRASE["en"]) + + self._setup_preview_btn.configure(state=self._tk.DISABLED) + self.setup_status.set(f"Generating preview for {voice.display_name}…") + + def work() -> None: + import tempfile + + err: str | None = None + tmp_path = "" + try: + with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp: + tmp_path = tmp.name + asyncio.run( + fast_generate_audio(phrase, voice.lang, tmp_path, voice=voice.id) + ) + played = play_audio(tmp_path) + if not played: + err = f"Saved {tmp_path}, but no player opened it." + except BaseException as exc: # noqa: BLE001 + err = str(exc) + + def done() -> None: + self._setup_preview_btn.configure(state=self._tk.NORMAL) + if err: + self.setup_status.set(err[:300]) + else: + self.setup_status.set(f"Played a sample of {voice.display_name}.") + + self.root.after(0, done) + + threading.Thread(target=work, daemon=True).start() + def _build_speak_tab(self, frm: Any) -> None: import tkinter as tk from tkinter import ttk, scrolledtext diff --git a/src/TTS_ka/mcp_server.py b/src/TTS_ka/mcp_server.py index 8ad3f06..9c393aa 100644 --- a/src/TTS_ka/mcp_server.py +++ b/src/TTS_ka/mcp_server.py @@ -39,6 +39,32 @@ MAX_CONCURRENT_PER_SESSION = 4 +def _audio_duration_seconds(path: str) -> Optional[float]: + """Best-effort audio duration in seconds (via ffprobe through pydub). + + Used to make ``speak(blocking=True)`` wait for playback to finish. + Returns ``None`` when the duration can't be determined. + """ + try: + from pydub.utils import mediainfo + info = mediainfo(path) + raw = info.get("duration") + return float(raw) if raw else None + except Exception: # noqa: BLE001 - any failure → unknown duration + return None + + +def _describe_settings(lang: str, voice: Optional[str], + prosody: Optional[ProsodyOpts]) -> str: + """Short human-readable echo of the resolved synthesis settings.""" + parts = [f"lang={lang}", f"voice={voice or 'default'}"] + if prosody is not None: + parts.append(f"rate={prosody.rate}") + parts.append(f"pitch={prosody.pitch}") + parts.append(f"volume={prosody.volume}") + return ", ".join(parts) + + class _LiveSession: """Long-lived sentence buffer + streaming player for one MCP stream.""" @@ -53,6 +79,8 @@ def __init__(self, lang: str, voice: Optional[str], self.tmp_dir = tempfile.mkdtemp(prefix="ttska-mcp-") self._idx = 0 # output-file counter (incremented inside _speak) self._queued = 0 # sentences ever scheduled — visible in status + self._failed = 0 # synth tasks that raised — visible in status + self._last_error: Optional[str] = None self._sem = asyncio.Semaphore(MAX_CONCURRENT_PER_SESSION) self._tasks: List[asyncio.Task] = [] self._closed = False @@ -83,8 +111,10 @@ def status(self) -> Dict[str, object]: "closed": self._closed, "total_sentences": self._queued, "synths_pending": self.synths_pending(), + "synths_failed": self._failed, + "last_error": self._last_error, "buffer_chars": len(preview), - "buffer_preview": preview[:80], + "buffer_preview": preview[:400], } async def _speak(self, sentence: str) -> None: @@ -100,6 +130,8 @@ async def _speak(self, sentence: str) -> None: voice=self.voice, prosody=self.prosody) self.player.add_chunk(path, chunk_index=idx) except Exception as exc: # noqa: BLE001 + self._failed += 1 + self._last_error = f"{type(exc).__name__}: {exc}" print(f"⚠️ synth failed (idx={idx}): {exc}", file=sys.stderr) async def close(self) -> int: @@ -206,7 +238,12 @@ async def speak(text: str, lang: str = "en", pitch: Optional[str] = None, volume: Optional[str] = None, blocking: bool = False) -> str: - """Speak *text* immediately. Returns when synth starts (or finishes if blocking). + """Speak *text* immediately. + + With ``blocking=False`` (default) the call returns as soon as + playback is launched. With ``blocking=True`` it waits for the audio's + full duration before returning, so an agent can sequence speech + without overlapping. The return string echoes the resolved settings. rate / pitch / volume are signed percentages or Hz (rate '+30%', pitch '+5Hz' or '-10%', volume '-25%'). Unspecified values fall @@ -216,6 +253,7 @@ async def speak(text: str, lang: str = "en", if not cleaned.strip(): return "skipped: empty" prosody = _resolve_prosody(rate, pitch, volume, default_prosody) + echo = _describe_settings(lang, voice, prosody) tmp_dir = tempfile.mkdtemp(prefix="ttska-mcp-one-") path = os.path.join(tmp_dir, "out.mp3") try: @@ -224,14 +262,19 @@ async def speak(text: str, lang: str = "en", except Exception as exc: # noqa: BLE001 shutil.rmtree(tmp_dir, ignore_errors=True) return f"error: {exc}" + played = play_audio(path) + if not played: + return f"error: no audio player available; saved {path} [{echo}]" if blocking: - # play_audio is non-blocking on most platforms; for blocking we'd - # need a sync waiter. Honour the parameter as a documentation hint - # and warn if true — current impl is fire-and-forget. - play_audio(path) - return f"played {path}" - play_audio(path) - return f"queued {path}" + # Truly wait for playback: sleep for the audio's measured duration + # so the agent can sequence speech instead of racing ahead. + dur = await asyncio.to_thread(_audio_duration_seconds, path) + if dur: + await asyncio.sleep(dur + 0.3) + shutil.rmtree(tmp_dir, ignore_errors=True) + played_for = f" in {dur:.1f}s" if dur else "" + return f"played{played_for} [{echo}]" + return f"queued {path} [{echo}]" @server.tool() async def stream_open(lang: str = "en", @@ -277,9 +320,11 @@ async def stream_close(session_id: str) -> str: async def session_status(session_id: str) -> Dict[str, object]: """Return progress info for a streaming session. - Fields: lang, voice, closed, total_sentences, synths_pending, - buffer_chars, buffer_preview. Useful for an agent to decide whether - to keep streaming or wait for synths to drain. + Fields: lang, voice, rate, pitch, volume, closed, total_sentences, + synths_pending, synths_failed, last_error, buffer_chars, + buffer_preview. Useful for an agent to decide whether to keep + streaming or wait for synths to drain, and to detect failed synths + (synths_failed > 0 with the most recent message in last_error). """ sess = sessions.get(session_id) if sess is None: @@ -300,6 +345,7 @@ async def list_sessions() -> List[Dict[str, object]]: "closed": sess._closed, "total_sentences": sess._queued, "synths_pending": sess.synths_pending(), + "synths_failed": sess._failed, }) return out diff --git a/tests/test_gui_smoke.py b/tests/test_gui_smoke.py index af377c9..8ae1337 100644 --- a/tests/test_gui_smoke.py +++ b/tests/test_gui_smoke.py @@ -2,9 +2,40 @@ from __future__ import annotations +from unittest.mock import MagicMock, patch + +import pytest + from TTS_ka.gui import _gui_output_path def test_gui_output_path_name() -> None: p = _gui_output_path() assert p.endswith("tts_ka_gui_last.mp3") + + +def test_setup_tab_builds_with_voices() -> None: + """The Setup tab constructs and is the first tab. Skips without a display.""" + tk = pytest.importorskip("tkinter") + try: + probe = tk.Tk() + probe.destroy() + except tk.TclError: + pytest.skip("no display available") + + from TTS_ka import gui + + # Keep construction fast/deterministic: stub the background dep check and + # any platform hotkey manager. + with patch("TTS_ka.deps.collect_dep_rows", return_value=[]), \ + patch("TTS_ka.deps.format_dep_report", return_value="ok"), \ + patch("TTS_ka.native_hotkeys.NativeHotkeyManager", MagicMock()): + app = gui.TTSSpeakApp({"lang": "en"}) + try: + # Setup is the first tab and the voice list is populated. + assert app._nb.tabs(), "notebook has tabs" + first_tab_text = app._nb.tab(app._nb.tabs()[0], "text") + assert first_tab_text == "Setup" + assert len(app._setup_voices) > 0 + finally: + app.root.destroy() diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 1635584..1ea2862 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -188,6 +188,58 @@ async def boom(*a, **kw): assert "error" in r assert "network down" in r + @pytest.mark.asyncio + async def test_speak_blocking_waits_for_duration(self): + """blocking=True sleeps for the measured duration and echoes settings.""" + import asyncio as _asyncio + server = build_server() + slept = {} + + async def fake(text, lang, output, *, voice=None, prosody=None): + with open(output, "wb") as f: + f.write(b"x") + + async def fake_sleep(secs): + slept["secs"] = secs + + with patch("TTS_ka.mcp_server.fast_generate_audio", side_effect=fake), \ + patch("TTS_ka.mcp_server.play_audio", return_value=True), \ + patch("TTS_ka.mcp_server._audio_duration_seconds", return_value=2.0), \ + patch.object(_asyncio, "sleep", side_effect=fake_sleep): + r = await _call(server, "speak", text="Hello", lang="en", blocking=True) + assert "played" in r + assert "lang=en" in r + assert slept["secs"] >= 2.0 + + @pytest.mark.asyncio + async def test_speak_no_player_reports_error(self): + """When no player launches, speak says so instead of pretending success.""" + server = build_server() + + async def fake(text, lang, output, *, voice=None, prosody=None): + with open(output, "wb") as f: + f.write(b"x") + + with patch("TTS_ka.mcp_server.fast_generate_audio", side_effect=fake), \ + patch("TTS_ka.mcp_server.play_audio", return_value=False): + r = await _call(server, "speak", text="Hi", lang="en") + assert "no audio player" in r + + @pytest.mark.asyncio + async def test_speak_echoes_settings_when_queued(self): + server = build_server() + + async def fake(text, lang, output, *, voice=None, prosody=None): + with open(output, "wb") as f: + f.write(b"x") + + with patch("TTS_ka.mcp_server.fast_generate_audio", side_effect=fake), \ + patch("TTS_ka.mcp_server.play_audio", return_value=True): + r = await _call(server, "speak", text="Hi", lang="en", + voice="en-US-JennyNeural") + assert "queued" in r + assert "voice=en-US-JennyNeural" in r + @pytest.mark.asyncio async def test_speak_propagates_voice(self): server = build_server() @@ -265,6 +317,27 @@ async def slow(text, lang, output, *, voice=None, prosody=None): gate.set() await _call(server, "stream_close", session_id=sid) + @pytest.mark.asyncio + async def test_status_failure_fields_present(self): + """synths_failed / last_error appear in the status snapshot.""" + sessions = {} + server = build_server(sessions=sessions) + + async def boom(text, lang, output, *, voice=None, prosody=None): + raise RuntimeError("network down") + + with patch("TTS_ka.mcp_server.fast_generate_audio", side_effect=boom): + sid = await _call(server, "stream_open", lang="en") + await _call(server, "stream_append", session_id=sid, text="One. ") + # Let the synth task run and fail. + import asyncio as _a + await _a.sleep(0) + await _a.gather(*sessions[sid]._tasks, return_exceptions=True) + snap = await _call(server, "session_status", session_id=sid) + assert snap["synths_failed"] == 1 + assert "network down" in (snap["last_error"] or "") + await _call(server, "stream_close", session_id=sid) + @pytest.mark.asyncio async def test_status_shows_buffer_preview(self): sessions = {} From 31f44e0daac874580810da6b3061e14130411ce0 Mon Sep 17 00:00:00 2001 From: David Chincharashvili Date: Wed, 3 Jun 2026 10:32:38 +0400 Subject: [PATCH 3/3] test: fix TestSemaphore event-loop flakiness on Python 3.9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Python 3.9 asyncio.Semaphore() binds to the current event loop at construction. pytest-asyncio can leave the main thread with no current (or a closed) loop after an async test, so the synchronous semaphore construction in TestSemaphore raised "There is no current event loop in thread 'MainThread'" — failing CI on the 3.9 matrix leg while 3.10/3.12 passed. Ensure a usable loop exists in setup_method. Production is unaffected: _generation_semaphore() is first called inside uvicorn's running loop. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/test_server.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/test_server.py b/tests/test_server.py index fad98b5..5533bb9 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -116,6 +116,22 @@ def test_serve_missing_fastapi_exits_2(self, capsys): class TestSemaphore: + def setup_method(self): + # On Python 3.9, asyncio.Semaphore() binds to the current event loop at + # construction time. pytest-asyncio can leave the main thread with no + # current (or a closed) loop after an async test, which makes the sync + # construction below raise "There is no current event loop". Ensure a + # usable loop exists. Production is unaffected: _generation_semaphore() + # is first called inside uvicorn's running loop. + import asyncio + + try: + loop = asyncio.get_event_loop() + if loop.is_closed(): + raise RuntimeError("closed") + except RuntimeError: + asyncio.set_event_loop(asyncio.new_event_loop()) + def test_semaphore_value_matches_max_workers(self): # Force a fresh allocation globals_in_server = vars(server)