Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ class SarvamTTSOptions:
base_url: API endpoint URL
ws_url: WebSocket endpoint URL
word_tokenizer: Tokenizer for processing text
max_session_duration: Maximum WebSocket session reuse duration in seconds (0 = fresh per request)
"""

target_language_code: SarvamTTSLanguages | str # BCP-47 for supported Indian languages
Expand All @@ -318,6 +319,7 @@ class SarvamTTSOptions:
word_tokenizer: tokenize.tokenizer.SentenceTokenizer | None = None
send_completion_event: bool = True
output_audio_codec: str = "mp3"
max_session_duration: float = 3600


class TTS(tts.TTS):
Expand Down Expand Up @@ -347,6 +349,10 @@ class TTS(tts.TTS):
ws_url: WebSocket endpoint URL
http_session: Optional aiohttp session to use
output_audio_codec: Optionally choose the output codec format (mp3)
max_session_duration: Maximum WebSocket connection reuse duration in seconds.
Connections are recycled after this period. Set to 0 for a fresh
connection per request, which may improve audio quality with some
providers. Defaults to 3600 (1 hour).
"""

def __init__(
Expand All @@ -373,6 +379,7 @@ def __init__(
http_session: aiohttp.ClientSession | None = None,
send_completion_event: bool = True,
output_audio_codec: str = "mp3",
max_session_duration: float = 3600,
) -> None:
super().__init__(
capabilities=tts.TTSCapabilities(streaming=True),
Expand Down Expand Up @@ -428,6 +435,8 @@ def __init__(
raise ValueError(
f"output_audio_codec must be one of {','.join(sorted(ALLOWED_OUTPUT_AUDIO_CODECS))}"
)
if max_session_duration < 0:
raise ValueError("max_session_duration must be >= 0")

# Validate model-speaker compatibility
if not validate_model_speaker_compatibility(model, speaker):
Expand Down Expand Up @@ -461,14 +470,15 @@ def __init__(
word_tokenizer=word_tokenizer,
send_completion_event=send_completion_event,
output_audio_codec=output_audio_codec,
max_session_duration=max_session_duration,
)
self._session = http_session
self._streams = weakref.WeakSet[SynthesizeStream]()

self._pool = utils.ConnectionPool[aiohttp.ClientWebSocketResponse](
connect_cb=self._connect_ws,
close_cb=self._close_ws,
max_session_duration=3600, # 1 hour
max_session_duration=self._opts.max_session_duration,
mark_refreshed_on_get=False,
)

Expand Down