From 91e7baddb795fc8e7e545f9ea8723d34a207afc4 Mon Sep 17 00:00:00 2001 From: akshatvishu Date: Tue, 4 Nov 2025 18:34:13 +0530 Subject: [PATCH 1/8] fix(audio): Normalize 'x-wav' audio format to 'wav' --- dspy/adapters/types/audio.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/dspy/adapters/types/audio.py b/dspy/adapters/types/audio.py index f39d60bda7..8f723e6fde 100644 --- a/dspy/adapters/types/audio.py +++ b/dspy/adapters/types/audio.py @@ -61,6 +61,10 @@ def from_url(cls, url: str) -> "Audio": if not mime_type.startswith("audio/"): raise ValueError(f"Unsupported MIME type for audio: {mime_type}") audio_format = mime_type.split("/")[1] + + if audio_format == "x-wav": + audio_format == "wav" + encoded_data = base64.b64encode(response.content).decode("utf-8") return cls(data=encoded_data, audio_format=audio_format) @@ -80,6 +84,10 @@ def from_file(cls, file_path: str) -> "Audio": file_data = file.read() audio_format = mime_type.split("/")[1] + + if audio_format == "x-wav": + audio_format == "wav" + encoded_data = base64.b64encode(file_data).decode("utf-8") return cls(data=encoded_data, audio_format=audio_format) @@ -126,6 +134,8 @@ def encode_audio(audio: Union[str, bytes, dict, "Audio", Any], sampling_rate: in header, b64data = audio.split(",", 1) mime = header.split(";")[0].split(":")[1] audio_format = mime.split("/")[1] + if audio_format == "x-wav": + audio_format == "wav" return {"data": b64data, "audio_format": audio_format} except Exception as e: raise ValueError(f"Malformed audio data URI: {e}") From 28eebf6f4a03f87b696de3a09e568988ad3a49f6 Mon Sep 17 00:00:00 2001 From: akshatvishu Date: Wed, 5 Nov 2025 16:40:43 +0530 Subject: [PATCH 2/8] fix(audio): Normalize all 'x-' prefixed audio formats to their standard equivalents --- dspy/adapters/types/audio.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/dspy/adapters/types/audio.py b/dspy/adapters/types/audio.py index 8f723e6fde..6f8dd2910b 100644 --- a/dspy/adapters/types/audio.py +++ b/dspy/adapters/types/audio.py @@ -62,8 +62,8 @@ def from_url(cls, url: str) -> "Audio": raise ValueError(f"Unsupported MIME type for audio: {mime_type}") audio_format = mime_type.split("/")[1] - if audio_format == "x-wav": - audio_format == "wav" + if "x-" in audio_format: + audio_format = audio_format.replace("x-", "") encoded_data = base64.b64encode(response.content).decode("utf-8") return cls(data=encoded_data, audio_format=audio_format) @@ -85,8 +85,8 @@ def from_file(cls, file_path: str) -> "Audio": audio_format = mime_type.split("/")[1] - if audio_format == "x-wav": - audio_format == "wav" + if "x-" in audio_format: + audio_format = audio_format.replace("x-", "") encoded_data = base64.b64encode(file_data).decode("utf-8") return cls(data=encoded_data, audio_format=audio_format) @@ -134,8 +134,10 @@ def encode_audio(audio: Union[str, bytes, dict, "Audio", Any], sampling_rate: in header, b64data = audio.split(",", 1) mime = header.split(";")[0].split(":")[1] audio_format = mime.split("/")[1] - if audio_format == "x-wav": - audio_format == "wav" + + if "x-" in audio_format: + audio_format = audio_format.replace("x-", "") + return {"data": b64data, "audio_format": audio_format} except Exception as e: raise ValueError(f"Malformed audio data URI: {e}") From 9a104be2b07ed5f5b0fb2e4a6f9789c3093d6b73 Mon Sep 17 00:00:00 2001 From: akshatvishu Date: Thu, 6 Nov 2025 19:34:53 +0530 Subject: [PATCH 3/8] refactor(audio): use removeprefix for safer audio format normalization --- dspy/adapters/types/audio.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/dspy/adapters/types/audio.py b/dspy/adapters/types/audio.py index 6f8dd2910b..330f8a7fd0 100644 --- a/dspy/adapters/types/audio.py +++ b/dspy/adapters/types/audio.py @@ -62,8 +62,7 @@ def from_url(cls, url: str) -> "Audio": raise ValueError(f"Unsupported MIME type for audio: {mime_type}") audio_format = mime_type.split("/")[1] - if "x-" in audio_format: - audio_format = audio_format.replace("x-", "") + audio_format = audio_format.removeprefix("x-") encoded_data = base64.b64encode(response.content).decode("utf-8") return cls(data=encoded_data, audio_format=audio_format) @@ -85,8 +84,7 @@ def from_file(cls, file_path: str) -> "Audio": audio_format = mime_type.split("/")[1] - if "x-" in audio_format: - audio_format = audio_format.replace("x-", "") + audio_format = audio_format.removeprefix("x-") encoded_data = base64.b64encode(file_data).decode("utf-8") return cls(data=encoded_data, audio_format=audio_format) @@ -135,8 +133,7 @@ def encode_audio(audio: Union[str, bytes, dict, "Audio", Any], sampling_rate: in mime = header.split(";")[0].split(":")[1] audio_format = mime.split("/")[1] - if "x-" in audio_format: - audio_format = audio_format.replace("x-", "") + audio_format = audio_format.removeprefix("x-") return {"data": b64data, "audio_format": audio_format} except Exception as e: From ea712f24b03cd9692921ff25d17eb136937a13b1 Mon Sep 17 00:00:00 2001 From: akshatvishu Date: Thu, 6 Nov 2025 20:59:17 +0530 Subject: [PATCH 4/8] refactor(audio): centralize audio format normalization logic --- dspy/adapters/types/audio.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/dspy/adapters/types/audio.py b/dspy/adapters/types/audio.py index 330f8a7fd0..7739337424 100644 --- a/dspy/adapters/types/audio.py +++ b/dspy/adapters/types/audio.py @@ -17,6 +17,11 @@ SF_AVAILABLE = False +def _normalize_audio_format(audio_format: str) -> str: + """Removes 'x-' prefixes from audio format strings.""" + return audio_format.removeprefix("x-") + + class Audio(Type): data: str audio_format: str @@ -62,7 +67,7 @@ def from_url(cls, url: str) -> "Audio": raise ValueError(f"Unsupported MIME type for audio: {mime_type}") audio_format = mime_type.split("/")[1] - audio_format = audio_format.removeprefix("x-") + audio_format = _normalize_audio_format(audio_format) # <-- Use the helper encoded_data = base64.b64encode(response.content).decode("utf-8") return cls(data=encoded_data, audio_format=audio_format) @@ -84,7 +89,7 @@ def from_file(cls, file_path: str) -> "Audio": audio_format = mime_type.split("/")[1] - audio_format = audio_format.removeprefix("x-") + audio_format = _normalize_audio_format(audio_format) # <-- Use the helper encoded_data = base64.b64encode(file_data).decode("utf-8") return cls(data=encoded_data, audio_format=audio_format) @@ -133,7 +138,7 @@ def encode_audio(audio: Union[str, bytes, dict, "Audio", Any], sampling_rate: in mime = header.split(";")[0].split(":")[1] audio_format = mime.split("/")[1] - audio_format = audio_format.removeprefix("x-") + audio_format = _normalize_audio_format(audio_format) return {"data": b64data, "audio_format": audio_format} except Exception as e: From f17ac7fb95412abee44689b4e03bdb9db9421300 Mon Sep 17 00:00:00 2001 From: akshatvishu Date: Thu, 6 Nov 2025 21:00:33 +0530 Subject: [PATCH 5/8] test: add unit tests for audio format normalization --- tests/adapters/test_audio.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 tests/adapters/test_audio.py diff --git a/tests/adapters/test_audio.py b/tests/adapters/test_audio.py new file mode 100644 index 0000000000..5651f9e5f1 --- /dev/null +++ b/tests/adapters/test_audio.py @@ -0,0 +1,33 @@ +import pytest + +# Import the new helper function from audio.py +from dspy.adapters.types.audio import _normalize_audio_format + + +@pytest.mark.parametrize( + "input_format, expected_format", + [ + # Case 1: Standard format (no change) + ("wav", "wav"), + ("mp3", "mp3"), + + # Case 2: The 'x-' prefix (the logic you changed) + ("x-wav", "wav"), + ("x-mp3", "mp3"), + ("x-flac", "flac"), + + # Case 3: The edge case ('x-' in the middle, proves 'removeprefix' is better) + ("my-x-format", "my-x-format"), + ("x-my-format", "my-format"), + + # Case 4: Empty string and edge cases + ("", ""), + ("x-", ""), + ], +) +def test_normalize_audio_format(input_format, expected_format): + """ + Tests that the _normalize_audio_format helper correctly removes 'x-' prefixes. + This single test covers the logic for from_url, from_file, and encode_audio. + """ + assert _normalize_audio_format(input_format) == expected_format From 2588c6ccb1df37061ee67b4c414775ad69ee9b8c Mon Sep 17 00:00:00 2001 From: akshatvishu Date: Thu, 6 Nov 2025 21:04:40 +0530 Subject: [PATCH 6/8] test/audio: update audio format normalization tests with comprehensive cases --- tests/adapters/test_audio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/adapters/test_audio.py b/tests/adapters/test_audio.py index 5651f9e5f1..6aee2c586f 100644 --- a/tests/adapters/test_audio.py +++ b/tests/adapters/test_audio.py @@ -11,12 +11,12 @@ ("wav", "wav"), ("mp3", "mp3"), - # Case 2: The 'x-' prefix (the logic you changed) + # Case 2: The 'x-' prefix ("x-wav", "wav"), ("x-mp3", "mp3"), ("x-flac", "flac"), - # Case 3: The edge case ('x-' in the middle, proves 'removeprefix' is better) + # Case 3: The edge case ("my-x-format", "my-x-format"), ("x-my-format", "my-format"), From eeb5bfab098e8fd83034be999d6f5421a1a452c5 Mon Sep 17 00:00:00 2001 From: akshatvishu Date: Thu, 6 Nov 2025 21:07:43 +0530 Subject: [PATCH 7/8] style: clean up test file comments --- tests/adapters/test_audio.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/adapters/test_audio.py b/tests/adapters/test_audio.py index 6aee2c586f..b7597fbd29 100644 --- a/tests/adapters/test_audio.py +++ b/tests/adapters/test_audio.py @@ -1,6 +1,5 @@ import pytest -# Import the new helper function from audio.py from dspy.adapters.types.audio import _normalize_audio_format From 9a86f4a7dd72ea3b5ad94af5e4752d0131b0352b Mon Sep 17 00:00:00 2001 From: akshatvishu Date: Thu, 6 Nov 2025 21:09:21 +0530 Subject: [PATCH 8/8] style: fix spelling error at audio.py --- dspy/adapters/types/audio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dspy/adapters/types/audio.py b/dspy/adapters/types/audio.py index 7739337424..0ceb734b73 100644 --- a/dspy/adapters/types/audio.py +++ b/dspy/adapters/types/audio.py @@ -67,7 +67,7 @@ def from_url(cls, url: str) -> "Audio": raise ValueError(f"Unsupported MIME type for audio: {mime_type}") audio_format = mime_type.split("/")[1] - audio_format = _normalize_audio_format(audio_format) # <-- Use the helper + audio_format = _normalize_audio_format(audio_format) encoded_data = base64.b64encode(response.content).decode("utf-8") return cls(data=encoded_data, audio_format=audio_format) @@ -89,7 +89,7 @@ def from_file(cls, file_path: str) -> "Audio": audio_format = mime_type.split("/")[1] - audio_format = _normalize_audio_format(audio_format) # <-- Use the helper + audio_format = _normalize_audio_format(audio_format) encoded_data = base64.b64encode(file_data).decode("utf-8") return cls(data=encoded_data, audio_format=audio_format)