Skip to content

Commit 3c3d0c0

Browse files
authored
Merge pull request #1597 from Capsize-Games/develop
auto detects language and responds in that language (tts and llm)
2 parents e0c70bb + ecf1a06 commit 3c3d0c0

File tree

16 files changed

+292
-67
lines changed

16 files changed

+292
-67
lines changed

setup.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
# Summarizations (basic)
4747
"sumy==0.11.0",
4848
"sentencepiece==0.2.0",
49+
"lingua-language-detector==2.1.0",
4950
],
5051
"agents": [
5152
"llama-index==0.12.36",
@@ -106,6 +107,8 @@
106107
],
107108
"openvoice_kr": [
108109
"jamo==0.4.1",
110+
"python-mecab-ko==1.3.7",
111+
"python-mecab-ko-dic==2.1.1.post2",
109112
],
110113
"openvoice_tw": [
111114
"g2pkk>=0.1.2",
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
"""add detected language columsn to application settings'
2+
3+
4+
Revision ID: 82e99b1a4ccc
5+
Revises: 72977a42e2a2
6+
Create Date: 2025-05-18 16:02:59.334823
7+
8+
"""
9+
10+
from typing import Sequence, Union
11+
12+
from airunner.data.models.application_settings import ApplicationSettings
13+
from airunner.utils.db import add_column, drop_column
14+
15+
16+
# revision identifiers, used by Alembic.
17+
revision: str = "82e99b1a4ccc"
18+
down_revision: Union[str, None] = "72977a42e2a2"
19+
branch_labels: Union[str, Sequence[str], None] = None
20+
depends_on: Union[str, Sequence[str], None] = None
21+
22+
23+
def upgrade() -> None:
24+
add_column(ApplicationSettings, "detected_language")
25+
add_column(ApplicationSettings, "use_detected_language")
26+
27+
28+
def downgrade() -> None:
29+
drop_column(ApplicationSettings, "detected_language")
30+
drop_column(ApplicationSettings, "use_detected_language")

src/airunner/api.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -862,6 +862,13 @@ def __init__(self, *args, **kwargs):
862862
"tohoku-nlp/bert-base-japanese-v3",
863863
)
864864
),
865+
"hfl/chinese-roberta-wwm-ext-large": os.path.expanduser(
866+
os.path.join(
867+
self.path_settings.base_path,
868+
"text/models/tts",
869+
"hfl/chinese-roberta-wwm-ext-large",
870+
)
871+
),
865872
}
866873
self._initialized = True
867874
self.llm = LLMAPIService(emit_signal=self.emit_signal)

src/airunner/data/bootstrap/openvoice_bootstrap_data.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,4 +93,15 @@
9393
"vocab.txt",
9494
]
9595
},
96+
"hfl/chinese-roberta-wwm-ext-large": {
97+
"files": [
98+
"added_tokens.json",
99+
"config.json",
100+
"pytorch_model.bin",
101+
"special_tokens_map.json",
102+
"tokenizer.json",
103+
"tokenizer_config.json",
104+
"vocab.txt",
105+
]
106+
},
96107
}

src/airunner/data/models/application_settings.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
GeneratorSection,
77
CanvasToolName,
88
Mode,
9+
AvailableLanguage,
910
)
1011

1112

@@ -64,3 +65,5 @@ class ApplicationSettings(BaseModel):
6465
nodegraph_zoom = Column(Integer, default=0)
6566
nodegraph_center_x = Column(Integer, default=0)
6667
nodegraph_center_y = Column(Integer, default=0)
68+
detected_language = Column(String, default=AvailableLanguage.EN.value)
69+
use_detected_language = Column(Boolean, default=True)

src/airunner/gui/widgets/llm/chat_prompt_widget.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from PySide6.QtCore import Slot, QPropertyAnimation, QTimer, Qt
66
from PySide6.QtWidgets import QSpacerItem, QSizePolicy, QApplication
77

8+
from airunner.data.models.application_settings import ApplicationSettings
89
from airunner.enums import (
910
SignalCode,
1011
LLMActionType,
@@ -15,13 +16,13 @@
1516
from airunner.gui.widgets.llm.templates.chat_prompt_ui import Ui_chat_prompt
1617
from airunner.gui.widgets.llm.message_widget import MessageWidget
1718
from airunner.data.models import Conversation
19+
from airunner.utils.llm.language import detect_language
1820
from airunner.utils.llm.strip_names_from_message import (
1921
strip_names_from_message,
2022
)
2123
from airunner.utils import create_worker
2224
from airunner.utils.widgets import load_splitter_settings
2325
from airunner.handlers.llm.llm_request import LLMRequest
24-
from airunner.handlers.llm.llm_response import LLMResponse
2526
from airunner.workers.llm_response_worker import LLMResponseWorker
2627
from airunner.settings import AIRUNNER_ART_ENABLED
2728

@@ -342,6 +343,12 @@ def do_generate(self, prompt_override=None):
342343
self.logger.warning("Prompt is empty")
343344
return
344345

346+
language = detect_language(prompt)
347+
ApplicationSettings.objects.update(
348+
self.application_settings.id,
349+
detected_language=language,
350+
)
351+
345352
if self.generating:
346353
if self.held_message is None:
347354
self.held_message = prompt

src/airunner/gui/widgets/stablediffusion/stablediffusion_generator_form.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ def __init__(self, *args, **kwargs):
156156
self.generator_settings.quality_effects
157157
)
158158
self.ui.infinite_images_button.setChecked(
159-
self.generator_settings.generate_infinite_images
159+
self.generator_settings.generate_infinite_images if self.generator_settings.generate_infinite_images is not None else False
160160
)
161161
self.ui.quality_effects.blockSignals(False)
162162
self.ui.infinite_images_button.blockSignals(False)

src/airunner/handlers/llm/agent/agents/base.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,41 @@ def __init__(
119119
)
120120
super().__init__(*args, **kwargs)
121121

122+
@property
123+
def language(self) -> str:
124+
# Use a private attribute to avoid recursion
125+
if hasattr(self, "_language") and self._language is not None:
126+
lang = self._language
127+
elif hasattr(self, "application_settings") and getattr(
128+
self.application_settings, "use_detected_language", False
129+
):
130+
lang = getattr(
131+
self.application_settings, "detected_language", "EN"
132+
)
133+
else:
134+
lang = "EN"
135+
# Map language codes to full names if needed
136+
if lang == "FR":
137+
return "French"
138+
elif lang == "DE":
139+
return "German"
140+
elif lang == "ES":
141+
return "Spanish"
142+
elif lang == "KO":
143+
return "Korean"
144+
elif lang == "RU":
145+
return "Russian"
146+
elif lang == "ZH":
147+
return "Chinese"
148+
elif lang == "JA":
149+
return "Japanese"
150+
else:
151+
return "English"
152+
153+
@language.setter
154+
def language(self, value: str):
155+
self._language = value
156+
122157
@property
123158
def use_memory(self) -> bool:
124159
use_memory = self._use_memory
@@ -1027,6 +1062,7 @@ def system_prompt(self) -> str:
10271062
)
10281063
else:
10291064
conversation_timestamp_prompt = ""
1065+
10301066
prompt = (
10311067
f"Your name is {self.botname}.\n"
10321068
f"- The user ({self.username}) is having a conversation with the assistant ({self.botname}).\n"
@@ -1066,7 +1102,12 @@ def system_prompt(self) -> str:
10661102
f"The conversation is between user ({self.username}) and assistant ({self.botname}).\n"
10671103
f"{conversation_timestamp_prompt}"
10681104
f"{section_prompt}"
1105+
f"------\n"
10691106
)
1107+
1108+
if self.language:
1109+
prompt += "Response in " + self.language + "\n"
1110+
10701111
prompt = prompt.replace("{{ username }}", self.username)
10711112
prompt = prompt.replace("{{ botname }}", self.botname)
10721113
prompt = prompt.replace("{{ speaker_name }}", self.username)

src/airunner/handlers/tts/openvoice_model_manager.py

Lines changed: 82 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import os
44
import torch
55
from airunner.settings import AIRUNNER_BASE_PATH
6+
from airunner.utils.llm.language import detect_language
67

78
torch.hub.set_dir(
89
os.environ.get("TORCH_HOME", os.path.join(AIRUNNER_BASE_PATH, "torch/hub"))
@@ -127,10 +128,28 @@ def __init__(self, *args, **kwargs):
127128
self.model: Optional[TTS] = None
128129
self.src_path: str = f"{self._output_dir}/tmp.wav"
129130
self._speed: float = 1.0
130-
self._language: AvailableLanguage = AvailableLanguage(
131-
self.openvoice_settings.language
132-
)
133131
self._reference_speaker = speaker_recording_path
132+
self._language: AvailableLanguage = (
133+
AvailableLanguage.EN_NEWEST
134+
) # Use a private attribute
135+
136+
@property
137+
def language(self) -> AvailableLanguage:
138+
"""
139+
Get the language setting for TTS.
140+
"""
141+
if hasattr(self, "application_settings") and getattr(
142+
self.application_settings, "use_detected_language", False
143+
):
144+
language = self.application_settings.detected_language
145+
lang = AvailableLanguage[language]
146+
else:
147+
lang = self._language
148+
return lang
149+
150+
@language.setter
151+
def language(self, value: AvailableLanguage):
152+
self._language = value
134153

135154
@property
136155
def device(self):
@@ -156,42 +175,77 @@ def tone_color_converter(self) -> StreamingToneColorConverter:
156175
)
157176
return self._tone_color_converter
158177

159-
def generate(self, tts_request: Type[TTSRequest]):
178+
_source_se: Optional[torch.Tensor] = None
179+
180+
@property
181+
def speaker_key(self) -> str:
160182
"""
161-
Generate speech using OpenVoice and apply tone color conversion.
183+
Get the speaker key for the TTS model.
162184
"""
163-
message = tts_request.message
164-
speaker_ids = self.model.hps.data.spk2id
165-
for speaker_key in speaker_ids.keys():
166-
speaker_id = speaker_ids[speaker_key]
167-
speaker_key = speaker_key.lower().replace("_", "-")
185+
if self.language is AvailableLanguage.EN:
186+
return "en-newest"
187+
return self.language.value.lower()
188+
189+
@property
190+
def speaker_id(self) -> str:
191+
# ['EN-US', 'EN-BR', 'EN_INDIA', 'EN-AU', 'EN-Default']
192+
if self.language is AvailableLanguage.EN:
193+
return "EN-Default"
194+
return self.language.value
168195

169-
source_se = torch.load(
196+
@property
197+
def source_se(self) -> torch.Tensor:
198+
if self._source_se is None:
199+
self._source_se = torch.load(
170200
os.path.join(
171201
self.path_settings.tts_model_path,
172-
f"openvoice/checkpoints_v2/base_speakers/ses/{speaker_key}.pth",
202+
f"openvoice/checkpoints_v2/base_speakers/ses/{self.speaker_key}.pth",
173203
),
174204
map_location=self.device,
175205
)
206+
return self._source_se
176207

177-
self.model.tts_to_file(
178-
message, speaker_id, self.src_path, speed=self._speed
179-
)
208+
def generate(self, tts_request: Type[TTSRequest]):
209+
"""
210+
Generate speech using OpenVoice and apply tone color conversion.
211+
"""
212+
message = tts_request.message
213+
language = AvailableLanguage(detect_language(tts_request.message))
214+
if self.language != language:
215+
self._source_se = None
216+
self.language = language
217+
self.model.language = self.language
218+
speaker_ids = self.model.hps.data.spk2id
219+
print(speaker_ids.keys())
220+
# print("SPEAKER KEY", speaker_key)
221+
# key = speaker_key.replace("-", "_").split("_")[0].upper()
222+
# if key == "En-Default":
223+
# key = "EN_NEWEST"
224+
# speaker_key = speaker_key.lower().replace("_", "-")
180225

181-
output_path = os.path.join(
182-
self.path_settings.tts_model_path,
183-
f"openvoice/{self._output_dir}/output_v2_{speaker_key}.wav",
184-
)
226+
print(self.model.language)
185227

186-
response = self.tone_color_converter.convert(
187-
audio_src_path=self.src_path,
188-
src_se=source_se,
189-
tgt_se=self._target_se,
190-
output_path=output_path,
191-
)
228+
self.model.tts_to_file(
229+
message,
230+
speaker_ids[self.speaker_id],
231+
self.src_path,
232+
speed=self._speed,
233+
)
234+
235+
output_path = os.path.join(
236+
self.path_settings.tts_model_path,
237+
f"openvoice/{self._output_dir}/output_v2_{self.speaker_key}.wav",
238+
)
239+
240+
response = self.tone_color_converter.convert(
241+
audio_src_path=self.src_path,
242+
src_se=self.source_se,
243+
tgt_se=self._target_se,
244+
output_path=output_path,
245+
)
192246

193-
if response is not None:
194-
self.api.tts.add_to_stream(response)
247+
if response is not None:
248+
self.api.tts.add_to_stream(response)
195249

196250
def load(self, _target_model=None):
197251
"""
@@ -201,7 +255,7 @@ def load(self, _target_model=None):
201255
self.unload()
202256
self.change_model_status(ModelType.TTS, ModelStatus.LOADING)
203257
self._initialize()
204-
self.model = TTS(language=self._language)
258+
self.model = TTS(language=self.language)
205259
self.change_model_status(ModelType.TTS, ModelStatus.LOADED)
206260

207261
def unload(self):

src/airunner/utils/llm/language.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from lingua import Language, LanguageDetectorBuilder
2+
from airunner.enums import AvailableLanguage
3+
4+
5+
def detect_language(txt: str) -> str:
6+
languages = [
7+
Language.ENGLISH,
8+
Language.FRENCH,
9+
Language.GERMAN,
10+
Language.SPANISH,
11+
Language.KOREAN,
12+
Language.RUSSIAN,
13+
Language.SPANISH,
14+
Language.CHINESE,
15+
Language.JAPANESE,
16+
]
17+
detector = LanguageDetectorBuilder.from_languages(*languages).build()
18+
language = detector.detect_language_of(txt)
19+
name = language.iso_code_639_1.name
20+
if name == "JA":
21+
return AvailableLanguage.JP.value
22+
if name == "KO":
23+
return AvailableLanguage.KR.value
24+
try:
25+
return AvailableLanguage(name).value
26+
except KeyError:
27+
print(f"Language {name} not found in AvailableLanguage enum.")
28+
return AvailableLanguage.EN.value

0 commit comments

Comments
 (0)