Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -196,4 +196,7 @@ cython_debug/

**/.nuxt
**/.data
**./output
**./output

*.mp3
*.pcm
5 changes: 5 additions & 0 deletions config.yaml.full
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ tool:
web_scraper:
endpoint:
api_key: # `token`
# [optional] https://console.volcengine.com/speech/new/experience/tts
text_to_speech:
app_id: # `app_id`
api_key: # `app_secret`
speaker: # `speaker`
# [optional] https://open.larkoffice.com/app
lark:
endpoint: # `app_id`
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ database = [
"tos>=2.8.4", # For TOS storage and Viking DB
"mem0ai==0.1.118", # For mem0
]
speech = []
eval = [
"prometheus-client>=0.22.1", # For exporting data to Prometheus pushgateway
"deepeval>=3.2.6", # For DeepEval-based evaluation
Expand Down
113 changes: 113 additions & 0 deletions tests/auth/veauth/test_speech_veauth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
from unittest.mock import patch, MagicMock
from veadk.auth.veauth.speech_veauth import get_speech_token


# Test cases


def test_get_speech_token_with_env_vars(monkeypatch):
"""Test when credentials are available in environment variables"""
# Setup
monkeypatch.setenv("VOLCENGINE_ACCESS_KEY", "test_access_key")
monkeypatch.setenv("VOLCENGINE_SECRET_KEY", "test_secret_key")

mock_response = {"Result": {"APIKeys": [{"APIKey": "test_api_key"}]}}

with patch("veadk.auth.veauth.speech_veauth.ve_request") as mock_ve_request:
mock_ve_request.return_value = mock_response

# Execute
result = get_speech_token()

# Verify
assert result == "test_api_key"
mock_ve_request.assert_called_once_with(
request_body={
"ProjectName": "default",
"OnlyAvailable": True,
},
header={"X-Security-Token": ""},
action="ListAPIKeys",
ak="test_access_key",
sk="test_secret_key",
service="speech_saas_prod",
version="2025-05-20",
region="cn-beijing",
host="open.volcengineapi.com",
)


def test_get_speech_token_with_vefaas_iam(monkeypatch):
"""Test when credentials are obtained from vefaas iam"""
# Setup
monkeypatch.delenv("VOLCENGINE_ACCESS_KEY", raising=False)
monkeypatch.delenv("VOLCENGINE_SECRET_KEY", raising=False)

mock_cred = MagicMock()
mock_cred.access_key_id = "vefaas_access_key"
mock_cred.secret_access_key = "vefaas_secret_key"
mock_cred.session_token = "vefaas_session_token"

mock_response = {"Result": {"APIKeys": [{"APIKey": "vefaas_api_key"}]}}

with (
patch(
"veadk.auth.veauth.speech_veauth.get_credential_from_vefaas_iam"
) as mock_get_cred,
patch("veadk.auth.veauth.speech_veauth.ve_request") as mock_ve_request,
):
mock_get_cred.return_value = mock_cred
mock_ve_request.return_value = mock_response

# Execute
result = get_speech_token(region="cn-shanghai")

# Verify
assert result == "vefaas_api_key"
mock_get_cred.assert_called_once()
mock_ve_request.assert_called_once_with(
request_body={
"ProjectName": "default",
"OnlyAvailable": True,
},
header={"X-Security-Token": "vefaas_session_token"},
action="ListAPIKeys",
ak="vefaas_access_key",
sk="vefaas_secret_key",
service="speech_saas_prod",
version="2025-05-20",
region="cn-shanghai",
host="open.volcengineapi.com",
)


def test_get_speech_token_invalid_response():
"""Test when API response is invalid"""
# Setup
monkeypatch = pytest.MonkeyPatch()
monkeypatch.setenv("VOLCENGINE_ACCESS_KEY", "test_access_key")
monkeypatch.setenv("VOLCENGINE_SECRET_KEY", "test_secret_key")

mock_response = {"Error": {"Message": "Invalid request"}}

with patch("veadk.auth.veauth.speech_veauth.ve_request") as mock_ve_request:
mock_ve_request.return_value = mock_response

# Execute & Verify
with pytest.raises(ValueError, match="Failed to get speech api key list"):
get_speech_token()
133 changes: 133 additions & 0 deletions tests/tools/builtin_tools/test_tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import queue
import json
import base64
import requests
from unittest import TestCase
from unittest.mock import patch, MagicMock
from google.adk.tools import ToolContext
from veadk.tools.builtin_tools.tts import (
text_to_speech,
handle_server_response,
save_output_to_file,
_audio_player_thread,
)


class TestTTS(TestCase):
def setUp(self):
self.mock_tool_context = MagicMock(spec=ToolContext)
self.mock_tool_context._invocation_context = MagicMock()
self.mock_tool_context._invocation_context.user_id = "test_user"

# Mock environment variables
self.patcher_env = patch.dict(
"os.environ",
{
"TOOL_VESPEECH_APP_ID": "test_app_id",
"TOOL_VESPEECH_API_KEY": "test_api_key",
"TOOL_VESPEECH_SPEAKER": "test_speaker",
},
)
self.patcher_env.start()

def tearDown(self):
self.patcher_env.stop()

@patch("requests.Session")
def test_tts_success(self, mock_session):
"""Test successful TTS request"""
# Setup mock response
mock_response = MagicMock()
mock_response.headers = {"X-Tt-Logid": "test_log_id"}
mock_response.iter_lines.return_value = [
json.dumps({"code": 0, "data": base64.b64encode(b"audio_chunk").decode()}),
json.dumps({"code": 20000000}),
]
mock_session.return_value.post.return_value = mock_response

# Call function
result = text_to_speech("test text", self.mock_tool_context)

# Assertions
self.assertIsInstance(result, dict)
self.assertIn("saved_audio_path", result)
mock_session.return_value.post.assert_called_once()
mock_response.close.assert_called_once()

@patch("requests.Session")
def test_tts_failure(self, mock_session):
"""Test TTS request failure"""
# Setup mock to raise exception
mock_session.return_value.post.side_effect = (
requests.exceptions.RequestException("Test error")
)

# Call function
result = text_to_speech("test text", self.mock_tool_context)

# Assertions
self.assertIsInstance(result, dict)
self.assertIn("error", result)
self.assertIn("Test error", result["error"])
mock_session.return_value.post.assert_called_once()

@patch("builtins.open")
def test_handle_server_response_success(self, mock_open):
"""Test successful response handling"""
# Setup mock response
mock_response = MagicMock()
mock_response.iter_lines.return_value = [
json.dumps({"code": 0, "data": base64.b64encode(b"audio_chunk").decode()}),
json.dumps({"code": 20000000}),
]

# Call function
handle_server_response(mock_response, "test.pcm")

# Assertions
mock_open.assert_called_once_with("test.pcm", "wb")

@patch("builtins.open")
def test_save_output_to_file_success(self, mock_open):
"""Test successful audio file save"""
# Setup mock file handler
mock_file = MagicMock()
mock_open.return_value.__enter__.return_value = mock_file

# Call function
save_output_to_file(b"audio_data", "test.pcm")

# Assertions
mock_open.assert_called_once_with("test.pcm", "wb")
mock_file.write.assert_called_once_with(b"audio_data")

@patch("time.sleep")
def test_audio_player_thread(self, mock_sleep):
"""Test audio player thread"""
# Setup test data
mock_queue = MagicMock()
mock_queue.get.side_effect = [b"audio_data", queue.Empty]
mock_stream = MagicMock()
stop_event = MagicMock()
stop_event.is_set.side_effect = [False, True]

# Call function
_audio_player_thread(mock_queue, mock_stream, stop_event)

# Assertions
mock_stream.write.assert_called_once_with(b"audio_data")
mock_queue.task_done.assert_called_once()
54 changes: 54 additions & 0 deletions veadk/auth/veauth/speech_veauth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

from veadk.auth.veauth.utils import get_credential_from_vefaas_iam
from veadk.utils.logger import get_logger
from veadk.utils.volcengine_sign import ve_request

logger = get_logger(__name__)


def get_speech_token(region: str = "cn-beijing") -> str:
logger.info("Fetching speech token...")

access_key = os.getenv("VOLCENGINE_ACCESS_KEY")
secret_key = os.getenv("VOLCENGINE_SECRET_KEY")
session_token = ""

if not (access_key and secret_key):
# try to get from vefaas iam
cred = get_credential_from_vefaas_iam()
access_key = cred.access_key_id
secret_key = cred.secret_access_key
session_token = cred.session_token

res = ve_request(
request_body={"ProjectName": "default", "OnlyAvailable": True},
header={"X-Security-Token": session_token},
action="ListAPIKeys",
ak=access_key,
sk=secret_key,
service="speech_saas_prod",
version="2025-05-20",
region=region,
host="open.volcengineapi.com",
)
try:
first_api_key_id = res["Result"]["APIKeys"][0]["APIKey"]
logger.info("Successfully fetching speech API Key.")
return first_api_key_id
except KeyError:
raise ValueError(f"Failed to get speech api key list: {res}")
12 changes: 12 additions & 0 deletions veadk/configs/tool_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from veadk.auth.veauth.prompt_pilot_veauth import PromptPilotVeAuth
from veadk.auth.veauth.vesearch_veauth import VesearchVeAuth
from veadk.auth.veauth.speech_veauth import get_speech_token


class PromptPilotConfig(BaseModel):
Expand All @@ -38,5 +39,16 @@ def api_key(self) -> str:
return os.getenv("TOOL_VESEARCH_API_KEY") or VesearchVeAuth().token


class VeSpeechConfig(BaseSettings):
model_config = SettingsConfigDict(env_prefix="TOOL_VESPEECH_")

endpoint: int | str = ""

@cached_property
def api_key(self) -> str:
return os.getenv("TOOL_VESPEECH_API_KEY") or get_speech_token()


class BuiltinToolConfigs(BaseModel):
vesearch: VeSearchConfig = Field(default_factory=VeSearchConfig)
vespeech: VeSpeechConfig = Field(default_factory=VeSpeechConfig)
Loading