1212
1313from typing import AsyncGenerator , Optional
1414
15+ import aiohttp
1516from loguru import logger
1617
1718from pipecat .frames .frames import (
2425from pipecat .services .tts_service import TTSService
2526from pipecat .utils .tracing .service_decorators import traced_tts
2627
27- try :
28- from deepgram import DeepgramClient , DeepgramClientOptions , SpeakOptions
29- except ModuleNotFoundError as e :
30- logger .error (f"Exception: { e } " )
31- logger .error ("In order to use Deepgram, you need to `pip install pipecat-ai[deepgram]`." )
32- raise Exception (f"Missing module: { e } " )
33-
3428
3529class DeepgramTTSService (TTSService ):
3630 """Deepgram text-to-speech service.
@@ -45,7 +39,8 @@ def __init__(
4539 * ,
4640 api_key : str ,
4741 voice : str = "aura-2-helena-en" ,
48- base_url : str = "" ,
42+ aiohttp_session : aiohttp .ClientSession ,
43+ base_url : str = "https://api.deepgram.com" ,
4944 sample_rate : Optional [int ] = None ,
5045 encoding : str = "linear16" ,
5146 ** kwargs ,
@@ -55,21 +50,22 @@ def __init__(
5550 Args:
5651 api_key: Deepgram API key for authentication.
5752 voice: Voice model to use for synthesis. Defaults to "aura-2-helena-en".
58- base_url: Custom base URL for Deepgram API. Uses default if empty.
53+ aiohttp_session: Shared aiohttp session for HTTP requests with connection pooling.
54+ base_url: Custom base URL for Deepgram API. Defaults to "https://api.deepgram.com".
5955 sample_rate: Audio sample rate in Hz. If None, uses service default.
6056 encoding: Audio encoding format. Defaults to "linear16".
6157 **kwargs: Additional arguments passed to parent TTSService class.
6258 """
6359 super ().__init__ (sample_rate = sample_rate , ** kwargs )
6460
61+ self ._api_key = api_key
62+ self ._session = aiohttp_session
63+ self ._base_url = base_url
6564 self ._settings = {
6665 "encoding" : encoding ,
6766 }
6867 self .set_voice (voice )
6968
70- client_options = DeepgramClientOptions (url = base_url )
71- self ._deepgram_client = DeepgramClient (api_key , config = client_options )
72-
7369 def can_generate_metrics (self ) -> bool :
7470 """Check if the service can generate metrics.
7571
@@ -90,27 +86,49 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
9086 """
9187 logger .debug (f"{ self } : Generating TTS [{ text } ]" )
9288
93- options = SpeakOptions (
94- model = self ._voice_id ,
95- encoding = self ._settings ["encoding" ],
96- sample_rate = self .sample_rate ,
97- container = "none" ,
98- )
89+ # Build URL with parameters
90+ url = f"{ self ._base_url } /v1/speak"
9991
100- try :
101- await self .start_ttfb_metrics ()
92+ headers = {"Authorization" : f"Token { self ._api_key } " , "Content-Type" : "application/json" }
10293
103- response = await self ._deepgram_client .speak .asyncrest .v ("1" ).stream_raw (
104- {"text" : text }, options
105- )
94+ params = {
95+ "model" : self ._voice_id ,
96+ "encoding" : self ._settings ["encoding" ],
97+ "sample_rate" : self .sample_rate ,
98+ "container" : "none" ,
99+ }
106100
107- await self .start_tts_usage_metrics (text )
108- yield TTSStartedFrame ()
101+ payload = {
102+ "text" : text ,
103+ }
104+
105+ try :
106+ await self .start_ttfb_metrics ()
109107
110- async for data in response .aiter_bytes ():
111- await self .stop_ttfb_metrics ()
112- if data :
113- yield TTSAudioRawFrame (audio = data , sample_rate = self .sample_rate , num_channels = 1 )
108+ async with self ._session .post (
109+ url , headers = headers , json = payload , params = params
110+ ) as response :
111+ if response .status != 200 :
112+ error_text = await response .text ()
113+ raise Exception (f"HTTP { response .status } : { error_text } " )
114+
115+ await self .start_tts_usage_metrics (text )
116+ yield TTSStartedFrame ()
117+
118+ CHUNK_SIZE = self .chunk_size
119+
120+ first_chunk = True
121+ async for chunk in response .content .iter_chunked (CHUNK_SIZE ):
122+ if first_chunk :
123+ await self .stop_ttfb_metrics ()
124+ first_chunk = False
125+
126+ if chunk :
127+ yield TTSAudioRawFrame (
128+ audio = chunk ,
129+ sample_rate = self .sample_rate ,
130+ num_channels = 1 ,
131+ )
114132
115133 yield TTSStoppedFrame ()
116134
0 commit comments