33import os
44import torch
55from airunner .settings import AIRUNNER_BASE_PATH
6+ from airunner .utils .llm .language import detect_language
67
78torch .hub .set_dir (
89 os .environ .get ("TORCH_HOME" , os .path .join (AIRUNNER_BASE_PATH , "torch/hub" ))
@@ -127,10 +128,28 @@ def __init__(self, *args, **kwargs):
127128 self .model : Optional [TTS ] = None
128129 self .src_path : str = f"{ self ._output_dir } /tmp.wav"
129130 self ._speed : float = 1.0
130- self ._language : AvailableLanguage = AvailableLanguage (
131- self .openvoice_settings .language
132- )
133131 self ._reference_speaker = speaker_recording_path
132+ self ._language : AvailableLanguage = (
133+ AvailableLanguage .EN_NEWEST
134+ ) # Use a private attribute
135+
136+ @property
137+ def language (self ) -> AvailableLanguage :
138+ """
139+ Get the language setting for TTS.
140+ """
141+ if hasattr (self , "application_settings" ) and getattr (
142+ self .application_settings , "use_detected_language" , False
143+ ):
144+ language = self .application_settings .detected_language
145+ lang = AvailableLanguage [language ]
146+ else :
147+ lang = self ._language
148+ return lang
149+
150+ @language .setter
151+ def language (self , value : AvailableLanguage ):
152+ self ._language = value
134153
135154 @property
136155 def device (self ):
@@ -156,42 +175,77 @@ def tone_color_converter(self) -> StreamingToneColorConverter:
156175 )
157176 return self ._tone_color_converter
158177
159- def generate (self , tts_request : Type [TTSRequest ]):
178+ _source_se : Optional [torch .Tensor ] = None
179+
180+ @property
181+ def speaker_key (self ) -> str :
160182 """
161- Generate speech using OpenVoice and apply tone color conversion .
183+ Get the speaker key for the TTS model .
162184 """
163- message = tts_request .message
164- speaker_ids = self .model .hps .data .spk2id
165- for speaker_key in speaker_ids .keys ():
166- speaker_id = speaker_ids [speaker_key ]
167- speaker_key = speaker_key .lower ().replace ("_" , "-" )
185+ if self .language is AvailableLanguage .EN :
186+ return "en-newest"
187+ return self .language .value .lower ()
188+
189+ @property
190+ def speaker_id (self ) -> str :
191+ # ['EN-US', 'EN-BR', 'EN_INDIA', 'EN-AU', 'EN-Default']
192+ if self .language is AvailableLanguage .EN :
193+ return "EN-Default"
194+ return self .language .value
168195
169- source_se = torch .load (
196+ @property
197+ def source_se (self ) -> torch .Tensor :
198+ if self ._source_se is None :
199+ self ._source_se = torch .load (
170200 os .path .join (
171201 self .path_settings .tts_model_path ,
172- f"openvoice/checkpoints_v2/base_speakers/ses/{ speaker_key } .pth" ,
202+ f"openvoice/checkpoints_v2/base_speakers/ses/{ self . speaker_key } .pth" ,
173203 ),
174204 map_location = self .device ,
175205 )
206+ return self ._source_se
176207
177- self .model .tts_to_file (
178- message , speaker_id , self .src_path , speed = self ._speed
179- )
208+ def generate (self , tts_request : Type [TTSRequest ]):
209+ """
210+ Generate speech using OpenVoice and apply tone color conversion.
211+ """
212+ message = tts_request .message
213+ language = AvailableLanguage (detect_language (tts_request .message ))
214+ if self .language != language :
215+ self ._source_se = None
216+ self .language = language
217+ self .model .language = self .language
218+ speaker_ids = self .model .hps .data .spk2id
219+ print (speaker_ids .keys ())
220+ # print("SPEAKER KEY", speaker_key)
221+ # key = speaker_key.replace("-", "_").split("_")[0].upper()
222+ # if key == "En-Default":
223+ # key = "EN_NEWEST"
224+ # speaker_key = speaker_key.lower().replace("_", "-")
180225
181- output_path = os .path .join (
182- self .path_settings .tts_model_path ,
183- f"openvoice/{ self ._output_dir } /output_v2_{ speaker_key } .wav" ,
184- )
226+ print (self .model .language )
185227
186- response = self .tone_color_converter .convert (
187- audio_src_path = self .src_path ,
188- src_se = source_se ,
189- tgt_se = self ._target_se ,
190- output_path = output_path ,
191- )
228+ self .model .tts_to_file (
229+ message ,
230+ speaker_ids [self .speaker_id ],
231+ self .src_path ,
232+ speed = self ._speed ,
233+ )
234+
235+ output_path = os .path .join (
236+ self .path_settings .tts_model_path ,
237+ f"openvoice/{ self ._output_dir } /output_v2_{ self .speaker_key } .wav" ,
238+ )
239+
240+ response = self .tone_color_converter .convert (
241+ audio_src_path = self .src_path ,
242+ src_se = self .source_se ,
243+ tgt_se = self ._target_se ,
244+ output_path = output_path ,
245+ )
192246
193- if response is not None :
194- self .api .tts .add_to_stream (response )
247+ if response is not None :
248+ self .api .tts .add_to_stream (response )
195249
196250 def load (self , _target_model = None ):
197251 """
@@ -201,7 +255,7 @@ def load(self, _target_model=None):
201255 self .unload ()
202256 self .change_model_status (ModelType .TTS , ModelStatus .LOADING )
203257 self ._initialize ()
204- self .model = TTS (language = self ._language )
258+ self .model = TTS (language = self .language )
205259 self .change_model_status (ModelType .TTS , ModelStatus .LOADED )
206260
207261 def unload (self ):
0 commit comments