diff --git a/src/mcp/server/fastmcp/__init__.py b/src/mcp/server/fastmcp/__init__.py index 84b052078..f8f9c1c4c 100644 --- a/src/mcp/server/fastmcp/__init__.py +++ b/src/mcp/server/fastmcp/__init__.py @@ -3,7 +3,7 @@ from importlib.metadata import version from .server import Context, FastMCP -from .utilities.types import Image +from .utilities.types import Audio, Image __version__ = version("mcp") -__all__ = ["FastMCP", "Context", "Image"] +__all__ = ["FastMCP", "Context", "Image", "Audio"] diff --git a/src/mcp/server/fastmcp/utilities/func_metadata.py b/src/mcp/server/fastmcp/utilities/func_metadata.py index 70be8796d..a4cb8ac5b 100644 --- a/src/mcp/server/fastmcp/utilities/func_metadata.py +++ b/src/mcp/server/fastmcp/utilities/func_metadata.py @@ -21,7 +21,7 @@ from mcp.server.fastmcp.exceptions import InvalidSignature from mcp.server.fastmcp.utilities.logging import get_logger -from mcp.server.fastmcp.utilities.types import Image +from mcp.server.fastmcp.utilities.types import Audio, Image from mcp.types import ContentBlock, TextContent logger = get_logger(__name__) @@ -506,6 +506,9 @@ def _convert_to_content( if isinstance(result, Image): return [result.to_image_content()] + if isinstance(result, Audio): + return [result.to_audio_content()] + if isinstance(result, list | tuple): return list( chain.from_iterable( diff --git a/src/mcp/server/fastmcp/utilities/types.py b/src/mcp/server/fastmcp/utilities/types.py index ccaa3d69a..da80b9996 100644 --- a/src/mcp/server/fastmcp/utilities/types.py +++ b/src/mcp/server/fastmcp/utilities/types.py @@ -3,7 +3,7 @@ import base64 from pathlib import Path -from mcp.types import ImageContent +from mcp.types import AudioContent, ImageContent class Image: @@ -52,3 +52,52 @@ def to_image_content(self) -> ImageContent: raise ValueError("No image data available") return ImageContent(type="image", data=data, mimeType=self._mime_type) + + +class Audio: + """Helper class for returning audio from tools.""" + + def __init__( + self, + path: str | Path | None = None, + data: bytes | None = None, + format: str | None = None, + ): + if path is None and data is None: + raise ValueError("Either path or data must be provided") + if path is not None and data is not None: + raise ValueError("Only one of path or data can be provided") + + self.path = Path(path) if path else None + self.data = data + self._format = format + self._mime_type = self._get_mime_type() + + def _get_mime_type(self) -> str: + """Get MIME type from format or guess from file extension.""" + if self._format: + return f"audio/{self._format.lower()}" + + if self.path: + suffix = self.path.suffix.lower() + return { + ".wav": "audio/wav", + ".mp3": "audio/mpeg", + ".ogg": "audio/ogg", + ".flac": "audio/flac", + ".aac": "audio/aac", + ".m4a": "audio/mp4", + }.get(suffix, "application/octet-stream") + return "audio/wav" # default for raw binary data + + def to_audio_content(self) -> AudioContent: + """Convert to MCP AudioContent.""" + if self.path: + with open(self.path, "rb") as f: + data = base64.b64encode(f.read()).decode() + elif self.data is not None: + data = base64.b64encode(self.data).decode() + else: + raise ValueError("No audio data available") + + return AudioContent(type="audio", data=data, mimeType=self._mime_type) diff --git a/tests/server/fastmcp/test_server.py b/tests/server/fastmcp/test_server.py index a9e0d182a..ae00077e0 100644 --- a/tests/server/fastmcp/test_server.py +++ b/tests/server/fastmcp/test_server.py @@ -10,7 +10,7 @@ from mcp.server.fastmcp import Context, FastMCP from mcp.server.fastmcp.prompts.base import Message, UserMessage from mcp.server.fastmcp.resources import FileResource, FunctionResource -from mcp.server.fastmcp.utilities.types import Image +from mcp.server.fastmcp.utilities.types import Audio, Image from mcp.shared.exceptions import McpError from mcp.shared.memory import ( create_connected_server_and_client_session as client_session, @@ -194,6 +194,10 @@ def image_tool_fn(path: str) -> Image: return Image(path) +def audio_tool_fn(path: str) -> Audio: + return Audio(path) + + def mixed_content_tool_fn() -> list[ContentBlock]: return [ TextContent(type="text", text="Hello"), @@ -299,6 +303,27 @@ async def test_tool_image_helper(self, tmp_path: Path): # Check structured content - Image return type should NOT have structured output assert result.structuredContent is None + @pytest.mark.anyio + async def test_tool_audio_helper(self, tmp_path: Path): + # Create a test audio + audio_path = tmp_path / "test.wav" + audio_path.write_bytes(b"fake wav data") + + mcp = FastMCP() + mcp.add_tool(audio_tool_fn) + async with client_session(mcp._mcp_server) as client: + result = await client.call_tool("audio_tool_fn", {"path": str(audio_path)}) + assert len(result.content) == 1 + content = result.content[0] + assert isinstance(content, AudioContent) + assert content.type == "audio" + assert content.mimeType == "audio/wav" + # Verify base64 encoding + decoded = base64.b64decode(content.data) + assert decoded == b"fake wav data" + # Check structured content - Image return type should NOT have structured output + assert result.structuredContent is None + @pytest.mark.anyio async def test_tool_mixed_content(self): mcp = FastMCP() @@ -371,6 +396,47 @@ def mixed_list_fn() -> list: # Check structured content - untyped list with Image objects should NOT have structured output assert result.structuredContent is None + @pytest.mark.anyio + async def test_tool_mixed_list_with_audio(self, tmp_path: Path): + """Test that lists containing Audio objects and other types are handled + correctly""" + # Create a test audio + audio_path = tmp_path / "test.wav" + audio_path.write_bytes(b"test audio data") + + def mixed_list_fn() -> list: + return [ + "text message", + Audio(audio_path), + {"key": "value"}, + TextContent(type="text", text="direct content"), + ] + + mcp = FastMCP() + mcp.add_tool(mixed_list_fn) + async with client_session(mcp._mcp_server) as client: + result = await client.call_tool("mixed_list_fn", {}) + assert len(result.content) == 4 + # Check text conversion + content1 = result.content[0] + assert isinstance(content1, TextContent) + assert content1.text == "text message" + # Check audio conversion + content2 = result.content[1] + assert isinstance(content2, AudioContent) + assert content2.mimeType == "audio/wav" + assert base64.b64decode(content2.data) == b"test audio data" + # Check dict conversion + content3 = result.content[2] + assert isinstance(content3, TextContent) + assert '"key": "value"' in content3.text + # Check direct TextContent + content4 = result.content[3] + assert isinstance(content4, TextContent) + assert content4.text == "direct content" + # Check structured content - untyped list with Audio objects should NOT have structured output + assert result.structuredContent is None + @pytest.mark.anyio async def test_tool_structured_output_basemodel(self): """Test tool with structured output returning BaseModel"""