-
-
Notifications
You must be signed in to change notification settings - Fork 160
OpenAI-Dotnet 8.x #446
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
OpenAI-Dotnet 8.x #446
Changes from all commits
4537219
84d6a49
5041754
b7fa4ba
dd08896
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -458,5 +458,6 @@ Version 4.4.0 | |
<PrivateAssets>all</PrivateAssets> | ||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> | ||
</PackageReference> | ||
<PackageReference Include="SIPSorcery" Version="8.0.14" /> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. prob need to remove this before publishing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It won't be possible to use WebRTC without it. I do understand if you'd prefer to keep dependencies down and that was what I was getting at in the previous discussion. The alternative would be a new separate package under RageAgainstThePixel or SIPSorcery. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let me dive into the specifics and see if there is a way to sort this out in a way that makes sense and is easy to use. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have an existing in a production project that uses SIPSorcery and this library, I'll fiddle with it to see what I can come up with while upgrading it from websockets to WebRTC |
||
</ItemGroup> | ||
</Project> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
// Licensed under the MIT License. See LICENSE in the project root for license information. | ||
|
||
using System.Text.Json.Serialization; | ||
|
||
namespace OpenAI.Realtime | ||
{ | ||
public sealed class OutputAudioBufferStartedResponse : BaseRealtimeEvent, IServerEvent | ||
{ | ||
/// <inheritdoc /> | ||
[JsonInclude] | ||
[JsonPropertyName("event_id")] | ||
public override string EventId { get; internal set; } | ||
|
||
/// <inheritdoc /> | ||
[JsonInclude] | ||
[JsonPropertyName("type")] | ||
public override string Type { get; protected set; } | ||
|
||
/// <summary> | ||
/// Milliseconds since the session started when speech was detected. | ||
/// </summary> | ||
[JsonInclude] | ||
[JsonPropertyName("audio_start_ms")] | ||
public int AudioStartMs { get; private set; } | ||
|
||
/// <summary> | ||
/// The ID of the user message item that will be created when speech stops. | ||
/// </summary> | ||
[JsonInclude] | ||
[JsonPropertyName("item_id")] | ||
public string ItemId { get; private set; } | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,200 @@ | ||
// Licensed under the MIT License. See LICENSE in the project root for license information. | ||
|
||
using OpenAI.Extensions; | ||
using SIPSorcery.Media; | ||
using SIPSorcery.Net; | ||
using SIPSorceryMedia.Abstractions; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Net; | ||
using System.Net.Http; | ||
using System.Net.Http.Headers; | ||
using System.Text; | ||
using System.Threading; | ||
using System.Threading.Tasks; | ||
|
||
namespace OpenAI.Realtime | ||
{ | ||
public sealed class RealtimeEndpointWebRTC : OpenAIBaseEndpoint | ||
{ | ||
private const string OPENAI_DATACHANNEL_NAME = "oai-events"; | ||
|
||
public readonly AudioEncoder AudioEncoder; | ||
|
||
public readonly AudioFormat AudioFormat; | ||
|
||
internal RealtimeEndpointWebRTC(OpenAIClient client) : base(client) { | ||
AudioEncoder = new AudioEncoder(includeOpus: true); | ||
AudioFormat = AudioEncoder.SupportedFormats.Single(x => x.FormatName == AudioCodecsEnum.OPUS.ToString()); | ||
} | ||
|
||
protected override string Root => "realtime"; | ||
|
||
protected override bool? IsWebSocketEndpoint => false; | ||
|
||
private RTCPeerConnection rtcPeerConnection; | ||
|
||
public event Action<IPEndPoint, SDPMediaTypesEnum, RTPPacket> OnRtpPacketReceived; | ||
|
||
public event Action OnPeerConnectionConnected; | ||
|
||
public event Action OnPeerConnectionClosedOrFailed; | ||
|
||
/// <summary> | ||
/// Creates a new realtime session with the provided <see cref="SessionConfiguration"/> options. | ||
/// </summary> | ||
/// <param name="configuration"><see cref="SessionConfiguration"/>.</param> | ||
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param> | ||
/// <returns><see cref="RealtimeSession"/>.</returns> | ||
public async Task<RealtimeSessionWebRTC> CreateSessionAsync(SessionConfiguration configuration = null, RTCConfiguration rtcConfiguration = null, CancellationToken cancellationToken = default) | ||
{ | ||
rtcPeerConnection = await CreatePeerConnection(rtcConfiguration); | ||
var session = new RealtimeSessionWebRTC(rtcPeerConnection, EnableDebug); | ||
var sessionCreatedTcs = new TaskCompletionSource<SessionResponse>(); | ||
|
||
try | ||
{ | ||
session.OnEventReceived += OnEventReceived; | ||
session.OnError += OnError; | ||
var offerSdp = rtcPeerConnection.createOffer(); | ||
var answerSdp = await SendSdpAsync(configuration?.Model, offerSdp.sdp); | ||
var setAnswerResult = rtcPeerConnection.setRemoteDescription( | ||
new RTCSessionDescriptionInit { sdp = answerSdp, type = RTCSdpType.answer } | ||
); | ||
|
||
if (setAnswerResult != SetDescriptionResultEnum.OK) | ||
{ | ||
sessionCreatedTcs.TrySetException(new Exception("WebRTC SDP negotiation failed")); | ||
} | ||
|
||
var sessionResponse = await sessionCreatedTcs.Task.WithCancellation(cancellationToken).ConfigureAwait(false); | ||
session.Configuration = sessionResponse.SessionConfiguration; | ||
await session.SendAsync(new UpdateSessionRequest(configuration), cancellationToken: cancellationToken).ConfigureAwait(false); | ||
} | ||
finally | ||
{ | ||
session.OnError -= OnError; | ||
session.OnEventReceived -= OnEventReceived; | ||
} | ||
|
||
return session; | ||
|
||
void OnError(Exception e) | ||
{ | ||
sessionCreatedTcs.SetException(e); | ||
} | ||
|
||
void OnEventReceived(IRealtimeEvent @event) | ||
{ | ||
try | ||
{ | ||
switch (@event) | ||
{ | ||
case RealtimeConversationResponse: | ||
Console.WriteLine("[conversation.created]"); | ||
break; | ||
case SessionResponse sessionResponse: | ||
if (sessionResponse.Type == "session.created") | ||
{ | ||
sessionCreatedTcs.TrySetResult(sessionResponse); | ||
} | ||
break; | ||
case RealtimeEventError realtimeEventError: | ||
sessionCreatedTcs.TrySetException(new Exception(realtimeEventError.Error.Message)); | ||
break; | ||
} | ||
} | ||
catch (Exception e) | ||
{ | ||
Console.WriteLine(e); | ||
sessionCreatedTcs.TrySetException(e); | ||
} | ||
} | ||
} | ||
|
||
private async Task<RTCPeerConnection> CreatePeerConnection(RTCConfiguration pcConfig) | ||
{ | ||
var peerConnection = new RTCPeerConnection(pcConfig); | ||
MediaStreamTrack audioTrack = new MediaStreamTrack(AudioFormat, MediaStreamStatusEnum.SendRecv); | ||
peerConnection.addTrack(audioTrack); | ||
|
||
var dataChannel = await peerConnection.createDataChannel(OPENAI_DATACHANNEL_NAME); | ||
|
||
if (EnableDebug) | ||
{ | ||
peerConnection.onconnectionstatechange += state => Console.WriteLine($"Peer connection connected changed to {state}."); | ||
peerConnection.OnTimeout += mediaType => Console.WriteLine($"Timeout on media {mediaType}."); | ||
peerConnection.oniceconnectionstatechange += state => Console.WriteLine($"ICE connection state changed to {state}."); | ||
|
||
peerConnection.onsignalingstatechange += () => | ||
{ | ||
if (peerConnection.signalingState == RTCSignalingState.have_local_offer) | ||
{ | ||
Console.WriteLine($"Local SDP:\n{peerConnection.localDescription.sdp}"); | ||
} | ||
else if (peerConnection.signalingState is RTCSignalingState.have_remote_offer or RTCSignalingState.stable) | ||
{ | ||
Console.WriteLine($"Remote SDP:\n{peerConnection.remoteDescription?.sdp}"); | ||
} | ||
}; | ||
} | ||
|
||
peerConnection.OnRtpPacketReceived += (ep, mt, rtp) => OnRtpPacketReceived?.Invoke(ep, mt, rtp); | ||
|
||
peerConnection.onconnectionstatechange += (state) => | ||
{ | ||
if (state is RTCPeerConnectionState.closed or | ||
RTCPeerConnectionState.failed or | ||
RTCPeerConnectionState.disconnected) | ||
{ | ||
OnPeerConnectionClosedOrFailed?.Invoke(); | ||
} | ||
}; | ||
|
||
dataChannel.onopen += () => OnPeerConnectionConnected?.Invoke(); | ||
|
||
dataChannel.onclose += () => OnPeerConnectionClosedOrFailed?.Invoke(); | ||
|
||
return peerConnection; | ||
} | ||
|
||
public void SendAudio(uint durationRtpUnits, byte[] sample) | ||
{ | ||
if(rtcPeerConnection != null && rtcPeerConnection.connectionState == RTCPeerConnectionState.connected) | ||
{ | ||
rtcPeerConnection.SendAudio(durationRtpUnits, sample); | ||
} | ||
} | ||
|
||
public async Task<string> SendSdpAsync(string model, string offerSdp, CancellationToken cancellationToken = default) | ||
{ | ||
model = string.IsNullOrWhiteSpace(model) ? Models.Model.GPT4oRealtime : model; | ||
var queryParameters = new Dictionary<string, string>(); | ||
|
||
if (client.OpenAIClientSettings.IsAzureOpenAI) | ||
{ | ||
queryParameters["deployment"] = model; | ||
} | ||
else | ||
{ | ||
queryParameters["model"] = model; | ||
} | ||
|
||
var content = new StringContent(offerSdp, Encoding.UTF8); | ||
content.Headers.ContentType = new MediaTypeHeaderValue("application/sdp"); | ||
|
||
var url = GetUrl(queryParameters: queryParameters); | ||
using var response = await client.Client.PostAsync(GetUrl(queryParameters: queryParameters), content, cancellationToken).ConfigureAwait(false); | ||
|
||
if(!response.IsSuccessStatusCode) | ||
{ | ||
var errorBody = await response.Content.ReadAsStringAsync(); | ||
throw new Exception($"Error sending SDP offer {errorBody}"); | ||
} | ||
|
||
var sdpAnswer = await response.ReadAsStringAsync(EnableDebug, content, cancellationToken).ConfigureAwait(false); | ||
return sdpAnswer; | ||
} | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.