diff --git a/EssentialCSharp.Chat.Shared/Services/AIChatService.cs b/EssentialCSharp.Chat.Shared/Services/AIChatService.cs index 5655781c..fe1bcb4d 100644 --- a/EssentialCSharp.Chat.Shared/Services/AIChatService.cs +++ b/EssentialCSharp.Chat.Shared/Services/AIChatService.cs @@ -4,6 +4,7 @@ using ModelContextProtocol.Client; using ModelContextProtocol.Protocol; using OpenAI.Responses; +using System.ClientModel; using System.Collections.Frozen; namespace EssentialCSharp.Chat.Common.Services; @@ -171,7 +172,11 @@ private static string SanitizeForXmlContext(string? input) => input?.Replace("<", "\u2039").Replace(">", "\u203A") ?? string.Empty; /// - /// Processes streaming updates from the OpenAI Responses API, handling both regular responses and function calls + /// Processes streaming updates from the OpenAI Responses API. + /// Buffers all function call items before executing them — this is critical for correctness: + /// if the model emits multiple parallel tool calls, firing a separate continuation per call + /// creates forked conversation branches. Collecting all calls and submitting all outputs + /// in a single continuation matches the non-streaming behavior. /// private async IAsyncEnumerable<(string text, string? responseId)> ProcessStreamingUpdatesAsync( #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. @@ -183,139 +188,151 @@ private static string SanitizeForXmlContext(string? input) => string? endUserId = null, [System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default) { - await foreach (var update in streamingUpdates.WithCancellation(cancellationToken)) - { + // Track this leg's response ID so tool-call continuations chain from it, + // ensuring the model's context includes the user's message + reasoning. + string? currentLegResponseId = null; #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. + List? pendingFunctionCalls = null; + + // Wrap the raw stream to convert context-length API errors to our domain exception. + // C# does not allow yield inside a try/catch, so error remapping is done in a + // separate helper that puts try/catch only around MoveNextAsync. + await foreach (var update in RethrowContextLengthErrors(streamingUpdates, responseOptions.PreviousResponseId, cancellationToken)) + { if (update is StreamingResponseCreatedUpdate created) { // Emit the response ID early so the controller can record ownership // before the stream completes — handles client disconnects mid-stream. - yield return (string.Empty, responseId: created.Response.Id); + currentLegResponseId = created.Response.Id; + yield return (string.Empty, responseId: currentLegResponseId); } else if (update is StreamingResponseOutputItemDoneUpdate itemDone) { - // Check if this is a function call that needs to be executed if (itemDone.Item is FunctionCallResponseItem functionCallItem && mcpClient != null) { if (toolCallDepth >= 10) throw new InvalidOperationException("Maximum tool call depth exceeded."); - // Execute the function call and stream its response. - // Each nested ProcessStreamingUpdatesAsync emits its own StreamingResponseCreatedUpdate - // (which already yields its responseId early), so no extra tracking is needed here. - await foreach (var functionResult in ExecuteFunctionCallAsync(functionCallItem, responseOptions, mcpClient, toolCallDepth + 1, endUserId, cancellationToken)) - { - yield return functionResult; - } + // Buffer all function calls — do NOT fire continuations inline. + // Sending one continuation per call would create N forked conversation + // branches, each missing the other N-1 tool results. + pendingFunctionCalls ??= []; + pendingFunctionCalls.Add(functionCallItem); } } else if (update is StreamingResponseOutputTextDeltaUpdate deltaUpdate) { yield return (deltaUpdate.Delta.ToString(), null); } - else if (update is StreamingResponseCompletedUpdate) + // StreamingResponseCompletedUpdate: ResponseId already emitted above — no-op. + } + + // After the stream completes, execute all buffered tool calls and send ALL outputs + // in a single continuation request. This mirrors the non-streaming loop in + // GetChatCompletionCore and avoids conversation branching. + if (pendingFunctionCalls is { Count: > 0 } && mcpClient != null) + { + // Guard: if the API never sent StreamingResponseCreatedUpdate, currentLegResponseId + // is null. Continuing would send FunctionCallOutputResponseItems referencing CallIds + // the server has no record of (PreviousResponseId = null), causing a 400. + if (currentLegResponseId is null) + throw new InvalidOperationException( + "Cannot continue tool-call chain: the streaming leg completed with tool calls but emitted no response ID."); + + var continuationOptions = CloneOptionsWithPreviousResponseId(responseOptions, currentLegResponseId); + var outputItems = new List(pendingFunctionCalls.Count); + + foreach (var functionCallItem in pendingFunctionCalls) { - // ResponseId was already emitted from StreamingResponseCreatedUpdate at the start - // of this response leg — no need to emit again from the completion event. + outputItems.Add(await ExecuteSingleToolCallAsync(functionCallItem, toolCallDepth, endUserId, mcpClient, cancellationToken)); } + + var continuationStream = _ResponseClient.CreateResponseStreamingAsync(outputItems, continuationOptions, cancellationToken); #pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. + + await foreach (var result in ProcessStreamingUpdatesAsync(continuationStream, continuationOptions, mcpClient, toolCallDepth + 1, endUserId, cancellationToken)) + { + yield return result; + } } } /// - /// Executes a function call and streams the response + /// Wraps a streaming response enumerable to remap + /// context-length errors to . + /// + /// C# prohibits yield return inside a try block with a catch clause + /// (CS1626). By putting the try/catch only around MoveNextAsync and the + /// yield return outside, we satisfy the compiler while still remapping exceptions. + /// /// - private async IAsyncEnumerable<(string text, string? responseId)> ExecuteFunctionCallAsync( #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. - FunctionCallResponseItem functionCallItem, - ResponseCreationOptions responseOptions, + private static async IAsyncEnumerable RethrowContextLengthErrors( + IAsyncEnumerable source, + string? previousResponseId, + [System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default) + { + await using var enumerator = source.GetAsyncEnumerator(cancellationToken); + while (true) + { + bool hasNext; + try { hasNext = await enumerator.MoveNextAsync(); } + catch (ClientResultException ex) when (IsContextLengthError(ex)) + { throw new ConversationContextLimitExceededException(previousResponseId, ex); } + + if (!hasNext) break; + yield return enumerator.Current; // yield return is outside try/catch — valid + } + } #pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. - McpClient mcpClient, + + /// + /// Executes a single MCP tool call and returns the output item to include in the + /// continuation request. Handles allowlist validation and argument-parsing errors + /// so a single bad tool never aborts the entire continuation. + /// +#pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. + private async Task ExecuteSingleToolCallAsync( + FunctionCallResponseItem functionCallItem, int toolCallDepth, - string? endUserId = null, - [System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default) + string? endUserId, + McpClient mcpClient, + CancellationToken cancellationToken) { // Defense-in-depth: validate tool name against static allowlist before executing. if (!IsMcpToolAllowed(functionCallItem.FunctionName)) { LogMcpToolCallRejected(_Logger, functionCallItem.FunctionName, endUserId); - // Feed a benign error back to the model so it can recover gracefully, - // mirroring what GetChatCompletionCore does on the non-streaming path. -#pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. - var errorItems = new List - { - functionCallItem, - new FunctionCallOutputResponseItem( - functionCallItem.CallId, - $"Tool '{functionCallItem.FunctionName}' is not available.") - }; -#pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. - var recoveryStream = _ResponseClient.CreateResponseStreamingAsync( - errorItems, responseOptions, cancellationToken); - await foreach (var result in ProcessStreamingUpdatesAsync( - recoveryStream, responseOptions, mcpClient, toolCallDepth, endUserId, cancellationToken)) - { - yield return result; - } - yield break; + return new FunctionCallOutputResponseItem( + functionCallItem.CallId, + $"Tool '{functionCallItem.FunctionName}' is not available."); } LogMcpToolCallInvokedStream(_Logger, functionCallItem.FunctionName, toolCallDepth, endUserId); - // A dictionary of arguments to pass to the tool. Each key represents a parameter name, and its associated value represents the argument value. - Dictionary arguments = []; - // example JsonResponse: - // "{\"question\":\"Azure OpenAI Responses API (Preview)\"}" - var jsonResponse = functionCallItem.FunctionArguments.ToString(); - var jsonArguments = System.Text.Json.JsonSerializer.Deserialize>(jsonResponse) ?? new Dictionary(); - - // Convert JsonElement values to their actual types - foreach (var kvp in jsonArguments) + + Dictionary arguments; + try { - if (kvp.Value is System.Text.Json.JsonElement jsonElement) - { - arguments[kvp.Key] = jsonElement.ValueKind switch - { - System.Text.Json.JsonValueKind.String => jsonElement.GetString(), - System.Text.Json.JsonValueKind.Number => jsonElement.GetDecimal(), - System.Text.Json.JsonValueKind.True => true, - System.Text.Json.JsonValueKind.False => false, - System.Text.Json.JsonValueKind.Null => null, - _ => jsonElement.ToString() - }; - } - else - { - arguments[kvp.Key] = kvp.Value; - } + arguments = ParseToolArguments(functionCallItem.FunctionArguments); + } + catch (Exception ex) + { + LogMcpToolArgumentParseError(_Logger, functionCallItem.FunctionName, ex, endUserId); + return new FunctionCallOutputResponseItem( + functionCallItem.CallId, + $"Error parsing arguments for '{functionCallItem.FunctionName}': invalid JSON."); } - // Execute the function call using the MCP client var toolResult = await mcpClient.CallToolAsync( functionCallItem.FunctionName, arguments: arguments, cancellationToken: cancellationToken); - // Create input items with both the function call and the result - // This matches the Python pattern: append both tool_call and result -#pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. - var inputItems = new List - { - functionCallItem, // The original function call - new FunctionCallOutputResponseItem(functionCallItem.CallId, McpToolResultFormatter.GetModelInput(toolResult)) - }; -#pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. - - // Stream the function call response using the same processing logic - var functionResponseStream = _ResponseClient.CreateResponseStreamingAsync( - inputItems, - responseOptions, - cancellationToken); - - await foreach (var result in ProcessStreamingUpdatesAsync(functionResponseStream, responseOptions, mcpClient, toolCallDepth, endUserId, cancellationToken)) - { - yield return result; - } + return new FunctionCallOutputResponseItem( + functionCallItem.CallId, + McpToolResultFormatter.GetModelInput(toolResult)); } +#pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. /// /// Creates response options with optional features @@ -348,12 +365,11 @@ private async Task CreateResponseOptionsAsync( options.PreviousResponseId = previousResponseId; } - // endUserId is reserved for forwarding to Azure OpenAI for end-user attribution - // (Microsoft Defender prompt-shield correlation). OpenAI .NET SDK v2.7.0 does not - // expose ResponseCreationOptions.User; this parameter is intentionally discarded - // until SDK support is available. + // Wire up end-user ID for Azure OpenAI abuse monitoring and Microsoft Defender + // prompt-shield correlation. The SDK now exposes EndUserId directly. // See: https://learn.microsoft.com/en-us/azure/defender-for-cloud/gain-end-user-context-ai - _ = endUserId; + if (!string.IsNullOrEmpty(endUserId)) + options.EndUserId = endUserId; // Add tools if provided if (tools != null) @@ -378,7 +394,11 @@ private async Task CreateResponseOptionsAsync( } #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. - options.Tools.Add(ResponseTool.CreateFunctionTool(tool.Name, functionDescription: tool.Description, strictModeEnabled: true, functionParameters: BinaryData.FromString(tool.JsonSchema.GetRawText()))); + // strictModeEnabled: false — MCP tool schemas come from external servers and are not + // guaranteed to satisfy OpenAI strict-mode constraints (all properties required, + // additionalProperties: false everywhere). A single non-conforming schema with + // strict mode enabled would cause a 400 at registration time for ALL tools. + options.Tools.Add(ResponseTool.CreateFunctionTool(tool.Name, functionDescription: tool.Description, strictModeEnabled: false, functionParameters: BinaryData.FromString(tool.JsonSchema.GetRawText()))); #pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. } } @@ -412,15 +432,22 @@ private async Task CreateResponseOptionsAsync( // Create the response using the Responses API #pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. List responseItems = [ResponseItem.CreateUserMessageItem(prompt)]; -#pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. const int MaxToolCallIterations = 10; for (int iteration = 0; iteration < MaxToolCallIterations; iteration++) { - var response = await _ResponseClient.CreateResponseAsync( - responseItems, - options: responseOptions, - cancellationToken: cancellationToken); + ClientResult response; + try + { + response = await _ResponseClient.CreateResponseAsync( + responseItems, + options: responseOptions, + cancellationToken: cancellationToken); + } + catch (ClientResultException ex) when (IsContextLengthError(ex)) + { + throw new ConversationContextLimitExceededException(responseOptions.PreviousResponseId, ex); + } string responseId = response.Value.Id; @@ -429,6 +456,12 @@ private async Task CreateResponseOptionsAsync( if (functionCalls.Count > 0 && mcpClient != null) { + // Advance the chain: the server now has everything up to responseId stored + // (user message + all prior function calls/results + this response's funcCalls). + // The next request only needs to supply the tool outputs — not the growing history. + responseOptions.PreviousResponseId = responseId; + responseItems = []; + foreach (var functionCallItem in functionCalls) { // Defense-in-depth: validate tool name against static allowlist before executing. @@ -436,8 +469,7 @@ private async Task CreateResponseOptionsAsync( if (!IsMcpToolAllowed(functionCallItem.FunctionName)) { LogMcpToolCallRejected(_Logger, functionCallItem.FunctionName, endUserId); - // Return a benign error to the model so it can respond gracefully - responseItems.Add(functionCallItem); + // The functionCallItem is in the stored response; send only the error output. responseItems.Add(new FunctionCallOutputResponseItem( functionCallItem.CallId, $"Tool '{functionCallItem.FunctionName}' is not available.")); @@ -445,23 +477,19 @@ private async Task CreateResponseOptionsAsync( } LogMcpToolCallInvoked(_Logger, functionCallItem.FunctionName, iteration, endUserId); - var jsonResponse = functionCallItem.FunctionArguments.ToString(); - var jsonArguments = System.Text.Json.JsonSerializer.Deserialize>(jsonResponse) ?? new Dictionary(); - Dictionary arguments = []; - foreach (var kvp in jsonArguments) + Dictionary arguments; + try { - arguments[kvp.Key] = kvp.Value is System.Text.Json.JsonElement jsonElement - ? jsonElement.ValueKind switch - { - System.Text.Json.JsonValueKind.String => jsonElement.GetString(), - System.Text.Json.JsonValueKind.Number => jsonElement.GetDecimal(), - System.Text.Json.JsonValueKind.True => true, - System.Text.Json.JsonValueKind.False => false, - System.Text.Json.JsonValueKind.Null => null, - _ => (object?)jsonElement.ToString() - } - : kvp.Value; + arguments = ParseToolArguments(functionCallItem.FunctionArguments); + } + catch (Exception ex) + { + LogMcpToolArgumentParseError(_Logger, functionCallItem.FunctionName, ex, endUserId); + responseItems.Add(new FunctionCallOutputResponseItem( + functionCallItem.CallId, + $"Error parsing arguments for '{functionCallItem.FunctionName}': invalid JSON.")); + continue; } var toolResult = await mcpClient.CallToolAsync( @@ -469,7 +497,8 @@ private async Task CreateResponseOptionsAsync( arguments: arguments, cancellationToken: cancellationToken); - responseItems.Add(functionCallItem); + // The functionCallItem is stored server-side in the response at PreviousResponseId. + // Only the tool output is new content that needs to be included. responseItems.Add(new FunctionCallOutputResponseItem( functionCallItem.CallId, McpToolResultFormatter.GetModelInput(toolResult))); @@ -501,6 +530,70 @@ private bool IsMcpToolAllowed(string toolName) return _AllowedMcpTools.Contains(toolName); } + /// + /// Returns a clone of with + /// replaced. + /// All behavior-affecting properties are copied so that tool-call continuation legs + /// produce identical generation behavior to the initial leg. + /// +#pragma warning disable OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. + private static ResponseCreationOptions CloneOptionsWithPreviousResponseId( + ResponseCreationOptions source, + string? previousResponseId) + { + var clone = new ResponseCreationOptions + { + Instructions = source.Instructions, + PreviousResponseId = previousResponseId, + EndUserId = source.EndUserId, + ReasoningOptions = source.ReasoningOptions, + MaxOutputTokenCount = source.MaxOutputTokenCount, + TextOptions = source.TextOptions, + TruncationMode = source.TruncationMode, + ParallelToolCallsEnabled = source.ParallelToolCallsEnabled, + StoredOutputEnabled = source.StoredOutputEnabled, + ToolChoice = source.ToolChoice, + Temperature = source.Temperature, + TopP = source.TopP, + ServiceTier = source.ServiceTier, + }; + foreach (var tool in source.Tools) + clone.Tools.Add(tool); + if (source.Metadata is { Count: > 0 }) + foreach (var kvp in source.Metadata) + clone.Metadata[kvp.Key] = kvp.Value; + return clone; + } +#pragma warning restore OPENAI001 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. + + /// + /// Parses function call arguments from a JSON payload into a + /// strongly-typed dictionary, converting values + /// to their native CLR equivalents. + /// + private static Dictionary ParseToolArguments(BinaryData functionArguments) + { + var jsonArguments = System.Text.Json.JsonSerializer.Deserialize>( + functionArguments.ToString()) ?? []; + + var arguments = new Dictionary(jsonArguments.Count); + foreach (var kvp in jsonArguments) + { + arguments[kvp.Key] = kvp.Value is System.Text.Json.JsonElement jsonElement + ? jsonElement.ValueKind switch + { + System.Text.Json.JsonValueKind.String => jsonElement.GetString(), + System.Text.Json.JsonValueKind.Number => jsonElement.GetDecimal(), + System.Text.Json.JsonValueKind.True => true, + System.Text.Json.JsonValueKind.False => false, + System.Text.Json.JsonValueKind.Null => null, + _ => (object?)jsonElement.ToString() + } + : kvp.Value; + } + return arguments; + } + [LoggerMessage(Level = LogLevel.Information, Message = "AI tool call invoked: tool={ToolName} iteration={Iteration} user={EndUserId}")] private static partial void LogMcpToolCallInvoked(ILogger logger, string toolName, int iteration, string? endUserId); @@ -510,9 +603,57 @@ private bool IsMcpToolAllowed(string toolName) [LoggerMessage(Level = LogLevel.Warning, Message = "AI tool call rejected — not on allowlist: tool={ToolName} user={EndUserId}")] private static partial void LogMcpToolCallRejected(ILogger logger, string toolName, string? endUserId); + [LoggerMessage(Level = LogLevel.Warning, Message = "Failed to parse tool arguments for '{ToolName}': user={EndUserId}")] + private static partial void LogMcpToolArgumentParseError(ILogger logger, string toolName, Exception exception, string? endUserId); + [LoggerMessage(Level = LogLevel.Warning, Message = "MCP tool skipped during option setup — not on allowlist: tool={ToolName}")] private static partial void LogMcpToolSkippedNotAllowed(ILogger logger, string toolName); [LoggerMessage(Level = LogLevel.Information, Message = "AI contextual search performed for prompt enrichment")] private static partial void LogContextualSearchPerformed(ILogger logger); + + /// + /// Returns true when the API error indicates the conversation context window was exceeded. + /// Prefers structured JSON error code from the response body; falls back to message text matching. + /// Also handles HTTP 413 (payload too large via API gateway) and token_limit_exceeded. + /// + private static bool IsContextLengthError(ClientResultException ex) + { + if (ex.Status is not (400 or 413)) return false; + + // Prefer structured error code from the response body + var errorCode = TryExtractErrorCode(ex); + if (errorCode is not null) + return errorCode is "context_length_exceeded" or "token_limit_exceeded"; + + // Fallback: substring match on exception message (Azure OpenAI format may vary) + return ex.Message.Contains("context_length_exceeded", StringComparison.OrdinalIgnoreCase) || + ex.Message.Contains("maximum context length", StringComparison.OrdinalIgnoreCase) || + ex.Message.Contains("reduce the length of the messages", StringComparison.OrdinalIgnoreCase) || + ex.Message.Contains("token_limit_exceeded", StringComparison.OrdinalIgnoreCase); + } + + /// + /// Attempts to extract the error.code field from the raw JSON response body. + /// Returns null on any parse failure — this is best-effort. + /// + private static string? TryExtractErrorCode(ClientResultException ex) + { + try + { + var content = ex.GetRawResponse()?.Content; + if (content is null) return null; + using var doc = System.Text.Json.JsonDocument.Parse(content.ToMemory()); + if (doc.RootElement.TryGetProperty("error", out var error) && + error.TryGetProperty("code", out var code)) + return code.GetString(); + } + catch (Exception) + { + // Best-effort extraction inside an error handler — catch all to guarantee we never + // throw from error-parsing logic. The Azure SDK response internals are outside our + // control and the exception surface can change across SDK versions. + } + return null; + } } diff --git a/EssentialCSharp.Chat.Shared/Services/ConversationContextLimitExceededException.cs b/EssentialCSharp.Chat.Shared/Services/ConversationContextLimitExceededException.cs new file mode 100644 index 00000000..adc802bd --- /dev/null +++ b/EssentialCSharp.Chat.Shared/Services/ConversationContextLimitExceededException.cs @@ -0,0 +1,30 @@ +namespace EssentialCSharp.Chat.Common.Services; + +/// +/// Thrown when a conversation's accumulated context exceeds the model's context window limit. +/// +/// +/// This occurs when using previous_response_id chaining over many turns — the server +/// reconstructs the full history on each request, and that history eventually exceeds the model's +/// maximum input tokens. Callers should prompt the user to start a new conversation rather than +/// retrying with the same previousResponseId. +/// +public sealed class ConversationContextLimitExceededException : Exception +{ + /// + /// The previous_response_id that caused the overflow, if known. + /// + public string? PreviousResponseId { get; } + + public ConversationContextLimitExceededException(string? previousResponseId) + : base("This conversation has exceeded the model's context window limit.") + { + PreviousResponseId = previousResponseId; + } + + public ConversationContextLimitExceededException(string? previousResponseId, Exception innerException) + : base("This conversation has exceeded the model's context window limit.", innerException) + { + PreviousResponseId = previousResponseId; + } +} diff --git a/EssentialCSharp.Web/Controllers/ChatController.cs b/EssentialCSharp.Web/Controllers/ChatController.cs index f38138e6..0fcd8577 100644 --- a/EssentialCSharp.Web/Controllers/ChatController.cs +++ b/EssentialCSharp.Web/Controllers/ChatController.cs @@ -44,21 +44,28 @@ public async Task SendMessage([FromBody] ChatMessageRequest reque if (!_ResponseIdValidationService.ValidateResponseId(userId, previousResponseId)) return BadRequest(new { error = "Invalid conversation context." }); - var (response, responseId) = await _AiChatService.GetChatCompletion( - prompt: request.Message, - previousResponseId: previousResponseId, - enableContextualSearch: request.EnableContextualSearch, - endUserId: userId, - cancellationToken: cancellationToken); + try + { + var (response, responseId) = await _AiChatService.GetChatCompletion( + prompt: request.Message, + previousResponseId: previousResponseId, + enableContextualSearch: request.EnableContextualSearch, + endUserId: userId, + cancellationToken: cancellationToken); - _ResponseIdValidationService.RecordResponseId(userId, responseId); + _ResponseIdValidationService.RecordResponseId(userId, responseId); - return Ok(new ChatMessageResponse + return Ok(new ChatMessageResponse + { + Response = response, + ResponseId = responseId, + Timestamp = DateTime.UtcNow + }); + } + catch (ConversationContextLimitExceededException) { - Response = response, - ResponseId = responseId, - Timestamp = DateTime.UtcNow - }); + return BadRequest(new { error = "This conversation has grown too long. Please start a new one.", errorCode = "context_limit_exceeded" }); + } } [HttpPost("stream")] @@ -130,6 +137,28 @@ public async Task StreamMessage([FromBody] ChatMessageRequest request, Cancellat { LogChatStreamCancelled(_Logger, User.Identity?.Name); } + catch (ConversationContextLimitExceededException) when (!Response.HasStarted) + { + Response.StatusCode = 400; + Response.ContentType = "application/json"; + await Response.WriteAsJsonAsync(new { error = "This conversation has grown too long. Please start a new one.", errorCode = "context_limit_exceeded" }, CancellationToken.None); + } + catch (ConversationContextLimitExceededException ex) + { + LogChatStreamErrorMidStream(_Logger, ex, User.Identity?.Name); + try + { + await Response.WriteAsync("data: {\"type\":\"error\",\"message\":\"This conversation has grown too long. Please start a new one.\",\"errorCode\":\"context_limit_exceeded\"}\n\n", CancellationToken.None); + await Response.Body.FlushAsync(CancellationToken.None); + } + catch (Exception) + { + // Best-effort write to an already-streaming response. Kestrel can throw + // IOException (connection reset), OperationCanceledException, or + // ObjectDisposedException on abrupt client disconnect — swallow all to + // avoid masking the original exception. + } + } catch (Exception ex) when (!Response.HasStarted) { LogChatStreamErrorBeforeResponseStarted(_Logger, ex, User.Identity?.Name); @@ -145,7 +174,13 @@ public async Task StreamMessage([FromBody] ChatMessageRequest request, Cancellat await Response.WriteAsync("data: {\"type\":\"error\",\"message\":\"Stream interrupted\"}\n\n", CancellationToken.None); await Response.Body.FlushAsync(CancellationToken.None); } - catch { /* client already disconnected */ } + catch (Exception) + { + // Best-effort write to an already-streaming response. Kestrel can throw + // IOException (connection reset), OperationCanceledException, or + // ObjectDisposedException on abrupt client disconnect — swallow all to + // avoid masking the original exception. + } } }