Skip to content

Commit 329806a

Browse files
dudududukimseratch
andauthored
fix(voice/stt): accept conversation.item.input_audio_transcription.completed (keep legacy alias) (#1537)
Co-authored-by: Kazuhiro Sera <[email protected]>
1 parent 50a909a commit 329806a

File tree

2 files changed

+28
-12
lines changed

2 files changed

+28
-12
lines changed

src/agents/voice/models/openai_stt.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,10 @@ async def _handle_events(self) -> None:
226226
break
227227

228228
event_type = event.get("type", "unknown")
229-
if event_type == "input_audio_transcription_completed":
229+
if event_type in [
230+
"input_audio_transcription_completed", # legacy
231+
"conversation.item.input_audio_transcription.completed",
232+
]:
230233
transcript = cast(str, event.get("transcript", ""))
231234
if len(transcript) > 0:
232235
self._end_turn(transcript)

tests/voice/test_openai_stt.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -184,22 +184,35 @@ async def test_stream_audio_sends_correct_json():
184184

185185

186186
@pytest.mark.asyncio
187-
async def test_transcription_event_puts_output_in_queue():
187+
@pytest.mark.parametrize(
188+
"created,updated,completed",
189+
[
190+
(
191+
{"type": "transcription_session.created"},
192+
{"type": "transcription_session.updated"},
193+
{"type": "input_audio_transcription_completed", "transcript": "Hello world!"},
194+
),
195+
(
196+
{"type": "session.created"},
197+
{"type": "session.updated"},
198+
{
199+
"type": "conversation.item.input_audio_transcription.completed",
200+
"transcript": "Hello world!",
201+
},
202+
),
203+
],
204+
)
205+
async def test_transcription_event_puts_output_in_queue(created, updated, completed):
188206
"""
189-
Test that a 'input_audio_transcription_completed' event
207+
Test that a 'input_audio_transcription_completed' event and
208+
'conversation.item.input_audio_transcription.completed'
190209
yields a transcript from transcribe_turns().
191210
"""
192211
mock_ws = create_mock_websocket(
193212
[
194-
json.dumps({"type": "transcription_session.created"}),
195-
json.dumps({"type": "transcription_session.updated"}),
196-
# Once configured, we mock a completed transcription event:
197-
json.dumps(
198-
{
199-
"type": "input_audio_transcription_completed",
200-
"transcript": "Hello world!",
201-
}
202-
),
213+
json.dumps(created),
214+
json.dumps(updated),
215+
json.dumps(completed),
203216
]
204217
)
205218

0 commit comments

Comments
 (0)