Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion OpenAI-DotNet/Extensions/RealtimeServerEventConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ _ when type.StartsWith("conversation.item.input_audio_transcription") => root.De
"input_audio_buffer.committed" => root.Deserialize<InputAudioBufferCommittedResponse>(options),
"input_audio_buffer.cleared" => root.Deserialize<InputAudioBufferClearedResponse>(options),
"input_audio_buffer.speech_started" => root.Deserialize<InputAudioBufferStartedResponse>(options),
"input_audio_buffer.speech_stopped" => root.Deserialize<InputAudioBufferStoppedResponse>(options),
"input_audio_buffer.speech_stopped" => root.Deserialize<InputAudioBufferStoppedResponse>(options),
"output_audio_buffer.started" => root.Deserialize<OutputAudioBufferStartedResponse>(options),
_ when type.StartsWith("response.audio_transcript") => root.Deserialize<ResponseAudioTranscriptResponse>(options),
_ when type.StartsWith("response.audio") => root.Deserialize<ResponseAudioResponse>(),
_ when type.StartsWith("response.content_part") => root.Deserialize<ResponseContentPartResponse>(options),
Expand Down
1 change: 1 addition & 0 deletions OpenAI-DotNet/OpenAI-DotNet.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -458,5 +458,6 @@ Version 4.4.0
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="SIPSorcery" Version="8.0.14" />
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prob need to remove this before publishing.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It won't be possible to use WebRTC without it.

I do understand if you'd prefer to keep dependencies down and that was what I was getting at in the previous discussion.

The alternative would be a new separate package under RageAgainstThePixel or SIPSorcery.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me dive into the specifics and see if there is a way to sort this out in a way that makes sense and is easy to use.

Copy link
Member

@StephenHodgson StephenHodgson May 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have an existing in a production project that uses SIPSorcery and this library, I'll fiddle with it to see what I can come up with while upgrading it from websockets to WebRTC

</ItemGroup>
</Project>
7 changes: 5 additions & 2 deletions OpenAI-DotNet/OpenAIClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ public OpenAIClient(OpenAIAuthentication openAIAuthentication = null, OpenAIClie
AssistantsEndpoint = new AssistantsEndpoint(this);
BatchEndpoint = new BatchEndpoint(this);
VectorStoresEndpoint = new VectorStoresEndpoint(this);
RealtimeEndpoint = new RealtimeEndpoint(this);
RealtimeEndpoint = new RealtimeEndpoint(this);
RealtimeEndpointWebRTC = new RealtimeEndpointWebRTC(this);
}

~OpenAIClient() => Dispose(false);
Expand Down Expand Up @@ -219,7 +220,9 @@ private void Dispose(bool disposing)
/// </summary>
public VectorStoresEndpoint VectorStoresEndpoint { get; }

public RealtimeEndpoint RealtimeEndpoint { get; }
public RealtimeEndpoint RealtimeEndpoint { get; }

public RealtimeEndpointWebRTC RealtimeEndpointWebRTC { get; }

#endregion Endpoints

Expand Down
33 changes: 33 additions & 0 deletions OpenAI-DotNet/Realtime/OutputAudioBufferStartedResponse.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using System.Text.Json.Serialization;

namespace OpenAI.Realtime
{
public sealed class OutputAudioBufferStartedResponse : BaseRealtimeEvent, IServerEvent
{
/// <inheritdoc />
[JsonInclude]
[JsonPropertyName("event_id")]
public override string EventId { get; internal set; }

/// <inheritdoc />
[JsonInclude]
[JsonPropertyName("type")]
public override string Type { get; protected set; }

/// <summary>
/// Milliseconds since the session started when speech was detected.
/// </summary>
[JsonInclude]
[JsonPropertyName("audio_start_ms")]
public int AudioStartMs { get; private set; }

/// <summary>
/// The ID of the user message item that will be created when speech stops.
/// </summary>
[JsonInclude]
[JsonPropertyName("item_id")]
public string ItemId { get; private set; }
}
}
200 changes: 200 additions & 0 deletions OpenAI-DotNet/Realtime/RealtimeEndpointWebRTC.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using OpenAI.Extensions;
using SIPSorcery.Media;
using SIPSorcery.Net;
using SIPSorceryMedia.Abstractions;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading;
using System.Threading.Tasks;

namespace OpenAI.Realtime
{
public sealed class RealtimeEndpointWebRTC : OpenAIBaseEndpoint
{
private const string OPENAI_DATACHANNEL_NAME = "oai-events";

public readonly AudioEncoder AudioEncoder;

public readonly AudioFormat AudioFormat;

internal RealtimeEndpointWebRTC(OpenAIClient client) : base(client) {
AudioEncoder = new AudioEncoder(includeOpus: true);
AudioFormat = AudioEncoder.SupportedFormats.Single(x => x.FormatName == AudioCodecsEnum.OPUS.ToString());
}

protected override string Root => "realtime";

protected override bool? IsWebSocketEndpoint => false;

private RTCPeerConnection rtcPeerConnection;

public event Action<IPEndPoint, SDPMediaTypesEnum, RTPPacket> OnRtpPacketReceived;

public event Action OnPeerConnectionConnected;

public event Action OnPeerConnectionClosedOrFailed;

/// <summary>
/// Creates a new realtime session with the provided <see cref="SessionConfiguration"/> options.
/// </summary>
/// <param name="configuration"><see cref="SessionConfiguration"/>.</param>
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
/// <returns><see cref="RealtimeSession"/>.</returns>
public async Task<RealtimeSessionWebRTC> CreateSessionAsync(SessionConfiguration configuration = null, RTCConfiguration rtcConfiguration = null, CancellationToken cancellationToken = default)
{
rtcPeerConnection = await CreatePeerConnection(rtcConfiguration);
var session = new RealtimeSessionWebRTC(rtcPeerConnection, EnableDebug);
var sessionCreatedTcs = new TaskCompletionSource<SessionResponse>();

try
{
session.OnEventReceived += OnEventReceived;
session.OnError += OnError;
var offerSdp = rtcPeerConnection.createOffer();
var answerSdp = await SendSdpAsync(configuration?.Model, offerSdp.sdp);
var setAnswerResult = rtcPeerConnection.setRemoteDescription(
new RTCSessionDescriptionInit { sdp = answerSdp, type = RTCSdpType.answer }
);

if (setAnswerResult != SetDescriptionResultEnum.OK)
{
sessionCreatedTcs.TrySetException(new Exception("WebRTC SDP negotiation failed"));
}

var sessionResponse = await sessionCreatedTcs.Task.WithCancellation(cancellationToken).ConfigureAwait(false);
session.Configuration = sessionResponse.SessionConfiguration;
await session.SendAsync(new UpdateSessionRequest(configuration), cancellationToken: cancellationToken).ConfigureAwait(false);
}
finally
{
session.OnError -= OnError;
session.OnEventReceived -= OnEventReceived;
}

return session;

void OnError(Exception e)
{
sessionCreatedTcs.SetException(e);
}

void OnEventReceived(IRealtimeEvent @event)
{
try
{
switch (@event)
{
case RealtimeConversationResponse:
Console.WriteLine("[conversation.created]");
break;
case SessionResponse sessionResponse:
if (sessionResponse.Type == "session.created")
{
sessionCreatedTcs.TrySetResult(sessionResponse);
}
break;
case RealtimeEventError realtimeEventError:
sessionCreatedTcs.TrySetException(new Exception(realtimeEventError.Error.Message));
break;
}
}
catch (Exception e)
{
Console.WriteLine(e);
sessionCreatedTcs.TrySetException(e);
}
}
}

private async Task<RTCPeerConnection> CreatePeerConnection(RTCConfiguration pcConfig)
{
var peerConnection = new RTCPeerConnection(pcConfig);
MediaStreamTrack audioTrack = new MediaStreamTrack(AudioFormat, MediaStreamStatusEnum.SendRecv);
peerConnection.addTrack(audioTrack);

var dataChannel = await peerConnection.createDataChannel(OPENAI_DATACHANNEL_NAME);

if (EnableDebug)
{
peerConnection.onconnectionstatechange += state => Console.WriteLine($"Peer connection connected changed to {state}.");
peerConnection.OnTimeout += mediaType => Console.WriteLine($"Timeout on media {mediaType}.");
peerConnection.oniceconnectionstatechange += state => Console.WriteLine($"ICE connection state changed to {state}.");

peerConnection.onsignalingstatechange += () =>
{
if (peerConnection.signalingState == RTCSignalingState.have_local_offer)
{
Console.WriteLine($"Local SDP:\n{peerConnection.localDescription.sdp}");
}
else if (peerConnection.signalingState is RTCSignalingState.have_remote_offer or RTCSignalingState.stable)
{
Console.WriteLine($"Remote SDP:\n{peerConnection.remoteDescription?.sdp}");
}
};
}

peerConnection.OnRtpPacketReceived += (ep, mt, rtp) => OnRtpPacketReceived?.Invoke(ep, mt, rtp);

peerConnection.onconnectionstatechange += (state) =>
{
if (state is RTCPeerConnectionState.closed or
RTCPeerConnectionState.failed or
RTCPeerConnectionState.disconnected)
{
OnPeerConnectionClosedOrFailed?.Invoke();
}
};

dataChannel.onopen += () => OnPeerConnectionConnected?.Invoke();

dataChannel.onclose += () => OnPeerConnectionClosedOrFailed?.Invoke();

return peerConnection;
}

public void SendAudio(uint durationRtpUnits, byte[] sample)
{
if(rtcPeerConnection != null && rtcPeerConnection.connectionState == RTCPeerConnectionState.connected)
{
rtcPeerConnection.SendAudio(durationRtpUnits, sample);
}
}

public async Task<string> SendSdpAsync(string model, string offerSdp, CancellationToken cancellationToken = default)
{
model = string.IsNullOrWhiteSpace(model) ? Models.Model.GPT4oRealtime : model;
var queryParameters = new Dictionary<string, string>();

if (client.OpenAIClientSettings.IsAzureOpenAI)
{
queryParameters["deployment"] = model;
}
else
{
queryParameters["model"] = model;
}

var content = new StringContent(offerSdp, Encoding.UTF8);
content.Headers.ContentType = new MediaTypeHeaderValue("application/sdp");

var url = GetUrl(queryParameters: queryParameters);
using var response = await client.Client.PostAsync(GetUrl(queryParameters: queryParameters), content, cancellationToken).ConfigureAwait(false);

if(!response.IsSuccessStatusCode)
{
var errorBody = await response.Content.ReadAsStringAsync();
throw new Exception($"Error sending SDP offer {errorBody}");
}

var sdpAnswer = await response.ReadAsStringAsync(EnableDebug, content, cancellationToken).ConfigureAwait(false);
return sdpAnswer;
}
}
}
Loading